mirror of https://gitee.com/openkylin/linux.git
drm pull request for 4.21-rc1
-----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJcExwOAAoJEAx081l5xIa+euIP/1NZZvSB+bsCtOwDG8I6uWsS OU5JUZ8q2dqyyFagRxzlkeSt3uWJqKp5NyNwuc9z/5u6AGF+3/97D0J1lG6Os/st 4abF6NadivYJ4cXhJ1ddIHOFMVDcAsyMWNDb93NwPwncCsQ0jt5FFOsrCyj6BGY+ ihHFlHrIyDrbBGDHz+u1E/EO5WkNnaLDoC+/k2fTRWCNI3bQL3O+orsYTI6S2uvU lQJnRfYAllgLD2p1k/rrBHcHXBv50roR0e8uhGmbdhGdp5bEW30UGBLHXxQjjSVy fQCwFwTO8X6zoxU53Zbbk+MVrp+jkTHcGKViHRuLkaHzE5mX26UXDwlXdN32ZUbK yHOJp+uDaWXX7MIz0LsB9Iqj2+eIUoFaIJMoZTMGVTNvqnTxKnoHnjAtbTH2u258 teFgmy4BIgPgo2kwEnBEZjCapou0Eivyut2wq8bTAB2Fe8LwURJpr3cioTtMLlUO L5/PoD27eFvBCAeFrQIwF3b2XiQEnBpXocmilEwP1xDMPgoyeePAfIF2iEpDvi0U jce3rLd2yVvo92xYUgoHkVTD8si/pKKnZ1D0U3+RI6pxK6s0HJEHjcNEMdvdm+2S 4qgvBQV3wlWFkXEK8PR5BHPoLntg18tKon/BTLBjgGkN9E1o9fWs1/s6KQGY4xdo l3Vvfx2LTdkgEoBssSwB =wh4W -----END PGP SIGNATURE----- Merge tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm Pull drm updates from Dave Airlie: "Core: - shared fencing staging removal - drop transactional atomic helpers and move helpers to new location - DP/MST atomic cleanup - Leasing cleanups and drop EXPORT_SYMBOL - Convert drivers to atomic helpers and generic fbdev. - removed deprecated obj_ref/unref in favour of get/put - Improve dumb callback documentation - MODESET_LOCK_BEGIN/END helpers panels: - CDTech panels, Banana Pi Panel, DLC1010GIG, - Olimex LCD-O-LinuXino, Samsung S6D16D0, Truly NT35597 WQXGA, - Himax HX8357D, simulated RTSM AEMv8. - GPD Win2 panel - AUO G101EVN010 vgem: - render node support ttm: - move global init out of drivers - fix LRU handling for ghost objects - Support for simultaneous submissions to multiple engines scheduler: - timeout/fault handling changes to help GPU recovery - helpers for hw with preemption support i915: - Scaler/Watermark fixes - DP MST + powerwell fixes - PSR fixes - Break long get/put shmemfs pages - Icelake fixes - Icelake DSI video mode enablement - Engine workaround improvements amdgpu: - freesync support - GPU reset enabled on CI, VI, SOC15 dGPUs - ABM support in DC - KFD support for vega12/polaris12 - SDMA paging queue on vega - More amdkfd code sharing - DCC scanout on GFX9 - DC kerneldoc - Updated SMU firmware for GFX8 chips - XGMI PSP + hive reset support - GPU reset - DC trace support - Powerplay updates for newer Polaris - Cursor plane update fast path - kfd dma-buf support virtio-gpu: - add EDID support vmwgfx: - pageflip with damage support nouveau: - Initial Turing TU104/TU106 modesetting support msm: - a2xx gpu support for apq8060 and imx5 - a2xx gpummu support - mdp4 display support for apq8060 - DPU fixes and cleanups - enhanced profiling support - debug object naming interface - get_iova/page pinning decoupling tegra: - Tegra194 host1x, VIC and display support enabled - Audio over HDMI for Tegra186 and Tegra194 exynos: - DMA/IOMMU refactoring - plane alpha + blend mode support - Color format fixes for mixer driver rcar-du: - R8A7744 and R8A77470 support - R8A77965 LVDS support imx: - fbdev emulation fix - multi-tiled scalling fixes - SPDX identifiers rockchip - dw_hdmi support - dw-mipi-dsi + dual dsi support - mailbox read size fix qxl: - fix cursor pinning vc4: - YUV support (scaling + cursor) v3d: - enable TFU (Texture Formatting Unit) mali-dp: - add support for linear tiled formats sun4i: - Display Engine 3 support - H6 DE3 mixer 0 support - H6 display engine support - dw-hdmi support - H6 HDMI phy support - implicit fence waiting - BGRX8888 support meson: - Overlay plane support - implicit fence waiting - HDMI 1.4 4k modes bridge: - i2c fixes for sii902x" * tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm: (1403 commits) drm/amd/display: Add fast path for cursor plane updates drm/amdgpu: Enable GPU recovery by default for CI drm/amd/display: Fix duplicating scaling/underscan connector state drm/amd/display: Fix unintialized max_bpc state values Revert "drm/amd/display: Set RMX_ASPECT as default" drm/amdgpu: Fix stub function name drm/msm/dpu: Fix clock issue after bind failure drm/msm/dpu: Clean up dpu_media_info.h static inline functions drm/msm/dpu: Further cleanups for static inline functions drm/msm/dpu: Cleanup the debugfs functions drm/msm/dpu: Remove dpu_irq and unused functions drm/msm: Make irq_postinstall optional drm/msm/dpu: Cleanup callers of dpu_hw_blk_init drm/msm/dpu: Remove unused functions drm/msm/dpu: Remove dpu_crtc_is_enabled() drm/msm/dpu: Remove dpu_crtc_get_mixer_height drm/msm/dpu: Remove dpu_dbg drm/msm: dpu: Remove crtc_lock drm/msm: dpu: Remove vblank_requested flag from dpu_crtc drm/msm: dpu: Separate crtc assignment from vblank enable ...
This commit is contained in:
commit
4971f090aa
|
@ -1,11 +1,14 @@
|
|||
Device tree bindings for Allwinner A64 DE2 bus
|
||||
Device tree bindings for Allwinner DE2/3 bus
|
||||
|
||||
The Allwinner A64 DE2 is on a special bus, which needs a SRAM region (SRAM C)
|
||||
to be claimed for enabling the access.
|
||||
to be claimed for enabling the access. The DE3 on Allwinner H6 is at the same
|
||||
situation, and the binding also applies.
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible: Should contain "allwinner,sun50i-a64-de2"
|
||||
- compatible: Should be one of:
|
||||
- "allwinner,sun50i-a64-de2"
|
||||
- "allwinner,sun50i-h6-de3", "allwinner,sun50i-a64-de2"
|
||||
- reg: A resource specifier for the register space
|
||||
- #address-cells: Must be set to 1
|
||||
- #size-cells: Must be set to 1
|
||||
|
|
|
@ -67,6 +67,8 @@ Required properties:
|
|||
Optional properties:
|
||||
- power-domains: Optional phandle to associated power domain as described in
|
||||
the file ../power/power_domain.txt
|
||||
- amlogic,canvas: phandle to canvas provider node as described in the file
|
||||
../soc/amlogic/amlogic,canvas.txt
|
||||
|
||||
Required nodes:
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ Required properties:
|
|||
- "renesas,r8a7793-lvds" for R8A7793 (R-Car M2-N) compatible LVDS encoders
|
||||
- "renesas,r8a7795-lvds" for R8A7795 (R-Car H3) compatible LVDS encoders
|
||||
- "renesas,r8a7796-lvds" for R8A7796 (R-Car M3-W) compatible LVDS encoders
|
||||
- "renesas,r8a77965-lvds" for R8A77965 (R-Car M3-N) compatible LVDS encoders
|
||||
- "renesas,r8a77970-lvds" for R8A77970 (R-Car V3M) compatible LVDS encoders
|
||||
- "renesas,r8a77980-lvds" for R8A77980 (R-Car V3H) compatible LVDS encoders
|
||||
- "renesas,r8a77990-lvds" for R8A77990 (R-Car E3) compatible LVDS encoders
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
Himax HX8357D display panels
|
||||
|
||||
This binding is for display panels using a Himax HX8357D controller in SPI
|
||||
mode, such as the Adafruit 3.5" TFT for Raspberry Pi.
|
||||
|
||||
Required properties:
|
||||
- compatible: "adafruit,yx350hv15", "himax,hx8357d"
|
||||
- dc-gpios: D/C pin
|
||||
- reg: address of the panel on the SPI bus
|
||||
|
||||
The node for this driver must be a child node of a SPI controller, hence
|
||||
all mandatory properties described in ../spi/spi-bus.txt must be specified.
|
||||
|
||||
Optional properties:
|
||||
- rotation: panel rotation in degrees counter clockwise (0,90,180,270)
|
||||
- backlight: phandle of the backlight device attached to the panel
|
||||
|
||||
Example:
|
||||
display@0{
|
||||
compatible = "adafruit,yx350hv15", "himax,hx8357d";
|
||||
reg = <0>;
|
||||
spi-max-frequency = <32000000>;
|
||||
dc-gpios = <&gpio0 25 GPIO_ACTIVE_HIGH>;
|
||||
rotation = <90>;
|
||||
backlight = <&backlight>;
|
||||
};
|
|
@ -106,6 +106,7 @@ Required properties:
|
|||
- clocks: Phandles to device clocks. See [1] for details on clock bindings.
|
||||
- clock-names: the following clocks are required:
|
||||
* "iface"
|
||||
* "ref" (only required for new DTS files/entries)
|
||||
For 28nm HPM/LP, 28nm 8960 PHYs:
|
||||
- vddio-supply: phandle to vdd-io regulator device node
|
||||
For 20nm PHY:
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
Qualcomm adreno/snapdragon GPU
|
||||
|
||||
Required properties:
|
||||
- compatible: "qcom,adreno-XYZ.W", "qcom,adreno"
|
||||
- compatible: "qcom,adreno-XYZ.W", "qcom,adreno" or
|
||||
"amd,imageon-XYZ.W", "amd,imageon"
|
||||
for example: "qcom,adreno-306.0", "qcom,adreno"
|
||||
Note that you need to list the less specific "qcom,adreno" (since this
|
||||
is what the device is matched on), in addition to the more specific
|
||||
with the chip-id.
|
||||
If "amd,imageon" is used, there should be no top level msm device.
|
||||
- reg: Physical base address and length of the controller's registers.
|
||||
- interrupts: The interrupt signal from the gpu.
|
||||
- clocks: device clocks
|
||||
|
|
|
@ -38,6 +38,8 @@ Required properties:
|
|||
Optional properties:
|
||||
- clock-names: the following clocks are optional:
|
||||
* "lut_clk"
|
||||
- qcom,lcdc-align-lsb: Boolean value indicating that LSB alignment should be
|
||||
used for LCDC. This is only valid for 18bpp panels.
|
||||
|
||||
Example:
|
||||
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
AU Optronics Corporation 10.1" (1280x800) color TFT LCD panel
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "auo,g101evn010"
|
||||
- power-supply: as specified in the base binding
|
||||
|
||||
Optional properties:
|
||||
- backlight: as specified in the base binding
|
||||
- enable-gpios: as specified in the base binding
|
||||
|
||||
This binding is compatible with the simple-panel binding, which is specified
|
||||
in simple-panel.txt in this directory.
|
|
@ -0,0 +1,12 @@
|
|||
Banana Pi 7" (S070WV20-CT16) TFT LCD Panel
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "bananapi,s070wv20-ct16"
|
||||
- power-supply: see ./panel-common.txt
|
||||
|
||||
Optional properties:
|
||||
- enable-gpios: see ./simple-panel.txt
|
||||
- backlight: see ./simple-panel.txt
|
||||
|
||||
This binding is compatible with the simple-panel binding, which is specified
|
||||
in ./simple-panel.txt.
|
|
@ -0,0 +1,12 @@
|
|||
CDTech(H.K.) Electronics Limited 4.3" 480x272 color TFT-LCD panel
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "cdtech,s043wq26h-ct7"
|
||||
- power-supply: as specified in the base binding
|
||||
|
||||
Optional properties:
|
||||
- backlight: as specified in the base binding
|
||||
- enable-gpios: as specified in the base binding
|
||||
|
||||
This binding is compatible with the simple-panel binding, which is specified
|
||||
in simple-panel.txt in this directory.
|
|
@ -0,0 +1,12 @@
|
|||
CDTech(H.K.) Electronics Limited 7" 800x480 color TFT-LCD panel
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "cdtech,s070wv95-ct16"
|
||||
- power-supply: as specified in the base binding
|
||||
|
||||
Optional properties:
|
||||
- backlight: as specified in the base binding
|
||||
- enable-gpios: as specified in the base binding
|
||||
|
||||
This binding is compatible with the simple-panel binding, which is specified
|
||||
in simple-panel.txt in this directory.
|
|
@ -0,0 +1,12 @@
|
|||
DLC Display Co. DLC1010GIG 10.1" WXGA TFT LCD Panel
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "dlc,dlc1010gig"
|
||||
- power-supply: See simple-panel.txt
|
||||
|
||||
Optional properties:
|
||||
- enable-gpios: See simple-panel.txt
|
||||
- backlight: See simple-panel.txt
|
||||
|
||||
This binding is compatible with the simple-panel binding, which is specified
|
||||
in simple-panel.txt in this directory.
|
|
@ -0,0 +1,42 @@
|
|||
Binding for Olimex Ltd. LCD-OLinuXino bridge panel.
|
||||
|
||||
This device can be used as bridge between a host controller and LCD panels.
|
||||
Currently supported LCDs are:
|
||||
- LCD-OLinuXino-4.3TS
|
||||
- LCD-OLinuXino-5
|
||||
- LCD-OLinuXino-7
|
||||
- LCD-OLinuXino-10
|
||||
|
||||
The panel itself contains:
|
||||
- AT24C16C EEPROM holding panel identification and timing requirements
|
||||
- AR1021 resistive touch screen controller (optional)
|
||||
- FT5x6 capacitive touch screnn controller (optional)
|
||||
- GT911/GT928 capacitive touch screen controller (optional)
|
||||
|
||||
The above chips share same I2C bus. The EEPROM is factory preprogrammed with
|
||||
device information (id, serial, etc.) and timing requirements.
|
||||
|
||||
Touchscreen bingings can be found in these files:
|
||||
- input/touchscreen/goodix.txt
|
||||
- input/touchscreen/edt-ft5x06.txt
|
||||
- input/touchscreen/ar1021.txt
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "olimex,lcd-olinuxino"
|
||||
- reg: address of the configuration EEPROM, should be <0x50>
|
||||
- power-supply: phandle of the regulator that provides the supply voltage
|
||||
|
||||
Optional properties:
|
||||
- enable-gpios: GPIO pin to enable or disable the panel
|
||||
- backlight: phandle of the backlight device attacked to the panel
|
||||
|
||||
Example:
|
||||
&i2c2 {
|
||||
panel@50 {
|
||||
compatible = "olimex,lcd-olinuxino";
|
||||
reg = <0x50>;
|
||||
power-supply = <®_vcc5v0>;
|
||||
enable-gpios = <&pio 7 8 GPIO_ACTIVE_HIGH>;
|
||||
backlight = <&backlight>;
|
||||
};
|
||||
};
|
|
@ -0,0 +1,30 @@
|
|||
Samsung S6D16D0 4" 864x480 AMOLED panel
|
||||
|
||||
Required properties:
|
||||
- compatible: should be:
|
||||
"samsung,s6d16d0",
|
||||
- reg: the virtual channel number of a DSI peripheral
|
||||
- vdd1-supply: I/O voltage supply
|
||||
- reset-gpios: a GPIO spec for the reset pin (active low)
|
||||
|
||||
The device node can contain one 'port' child node with one child
|
||||
'endpoint' node, according to the bindings defined in
|
||||
media/video-interfaces.txt. This node should describe panel's video bus.
|
||||
|
||||
Example:
|
||||
&dsi {
|
||||
...
|
||||
|
||||
panel@0 {
|
||||
compatible = "samsung,s6d16d0";
|
||||
reg = <0>;
|
||||
vdd1-supply = <&foo>;
|
||||
reset-gpios = <&foo_gpio 0 GPIO_ACTIVE_LOW>;
|
||||
|
||||
port {
|
||||
panel_in: endpoint {
|
||||
remote-endpoint = <&dsi_out>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
|
@ -1,47 +1,70 @@
|
|||
TPO TPG110 Panel
|
||||
================
|
||||
|
||||
This binding builds on the DPI bindings, adding a few properties
|
||||
as a superset of a DPI. See panel-dpi.txt for the required DPI
|
||||
bindings.
|
||||
This panel driver is a component that acts as an intermediary
|
||||
between an RGB output and a variety of panels. The panel
|
||||
driver is strapped up in electronics to the desired resolution
|
||||
and other properties, and has a control interface over 3WIRE
|
||||
SPI. By talking to the TPG110 over SPI, the strapped properties
|
||||
can be discovered and the hardware is therefore mostly
|
||||
self-describing.
|
||||
|
||||
+--------+
|
||||
SPI -> | TPO | -> physical display
|
||||
RGB -> | TPG110 |
|
||||
+--------+
|
||||
|
||||
If some electrical strap or alternate resolution is desired,
|
||||
this can be set up by taking software control of the display
|
||||
over the SPI interface. The interface can also adjust
|
||||
for properties of the display such as gamma correction and
|
||||
certain electrical driving levels.
|
||||
|
||||
The TPG110 does not know the physical dimensions of the panel
|
||||
connected, so this needs to be specified in the device tree.
|
||||
|
||||
It requires a GPIO line for control of its reset line.
|
||||
|
||||
The serial protocol has line names that resemble I2C but the
|
||||
protocol is not I2C but 3WIRE SPI.
|
||||
|
||||
Required properties:
|
||||
- compatible : "tpo,tpg110"
|
||||
- compatible : one of:
|
||||
"ste,nomadik-nhk15-display", "tpo,tpg110"
|
||||
"tpo,tpg110"
|
||||
- grestb-gpios : panel reset GPIO
|
||||
- scen-gpios : serial control enable GPIO
|
||||
- scl-gpios : serial control clock line GPIO
|
||||
- sda-gpios : serial control data line GPIO
|
||||
- width-mm : see display/panel/panel-common.txt
|
||||
- height-mm : see display/panel/panel-common.txt
|
||||
|
||||
Required nodes:
|
||||
- Video port for DPI input, see panel-dpi.txt
|
||||
- Panel timing for DPI setup, see panel-dpi.txt
|
||||
The device needs to be a child of an SPI bus, see
|
||||
spi/spi-bus.txt. The SPI child must set the following
|
||||
properties:
|
||||
- spi-3wire
|
||||
- spi-max-frequency = <3000000>;
|
||||
as these are characteristics of this device.
|
||||
|
||||
The device node can contain one 'port' child node with one child
|
||||
'endpoint' node, according to the bindings defined in
|
||||
media/video-interfaces.txt. This node should describe panel's video bus.
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
panel {
|
||||
compatible = "tpo,tpg110", "panel-dpi";
|
||||
grestb-gpios = <&stmpe_gpio44 5 GPIO_ACTIVE_LOW>;
|
||||
scen-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
|
||||
scl-gpios = <&gpio0 5 GPIO_ACTIVE_HIGH>;
|
||||
sda-gpios = <&gpio0 4 GPIO_ACTIVE_HIGH>;
|
||||
panel: display@0 {
|
||||
compatible = "tpo,tpg110";
|
||||
reg = <0>;
|
||||
spi-3wire;
|
||||
/* 320 ns min period ~= 3 MHz */
|
||||
spi-max-frequency = <3000000>;
|
||||
/* Width and height from data sheet */
|
||||
width-mm = <116>;
|
||||
height-mm = <87>;
|
||||
grestb-gpios = <&foo_gpio 5 GPIO_ACTIVE_LOW>;
|
||||
backlight = <&bl>;
|
||||
|
||||
port {
|
||||
nomadik_clcd_panel: endpoint {
|
||||
remote-endpoint = <&nomadik_clcd_pads>;
|
||||
remote-endpoint = <&foo>;
|
||||
};
|
||||
};
|
||||
|
||||
panel-timing {
|
||||
clock-frequency = <33200000>;
|
||||
hactive = <800>;
|
||||
hback-porch = <216>;
|
||||
hfront-porch = <40>;
|
||||
hsync-len = <1>;
|
||||
vactive = <480>;
|
||||
vback-porch = <35>;
|
||||
vfront-porch = <10>;
|
||||
vsync-len = <1>;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -4,7 +4,9 @@ Required Properties:
|
|||
|
||||
- compatible: must be one of the following.
|
||||
- "renesas,du-r8a7743" for R8A7743 (RZ/G1M) compatible DU
|
||||
- "renesas,du-r8a7744" for R8A7744 (RZ/G1N) compatible DU
|
||||
- "renesas,du-r8a7745" for R8A7745 (RZ/G1E) compatible DU
|
||||
- "renesas,du-r8a77470" for R8A77470 (RZ/G1C) compatible DU
|
||||
- "renesas,du-r8a7779" for R8A7779 (R-Car H1) compatible DU
|
||||
- "renesas,du-r8a7790" for R8A7790 (R-Car H2) compatible DU
|
||||
- "renesas,du-r8a7791" for R8A7791 (R-Car M2-W) compatible DU
|
||||
|
@ -52,7 +54,9 @@ corresponding to each DU output.
|
|||
Port0 Port1 Port2 Port3
|
||||
-----------------------------------------------------------------------------
|
||||
R8A7743 (RZ/G1M) DPAD 0 LVDS 0 - -
|
||||
R8A7744 (RZ/G1N) DPAD 0 LVDS 0 - -
|
||||
R8A7745 (RZ/G1E) DPAD 0 DPAD 1 - -
|
||||
R8A77470 (RZ/G1C) DPAD 0 DPAD 1 LVDS 0 -
|
||||
R8A7779 (R-Car H1) DPAD 0 DPAD 1 - -
|
||||
R8A7790 (R-Car H2) DPAD 0 LVDS 0 LVDS 1 -
|
||||
R8A7791 (R-Car M2-W) DPAD 0 LVDS 0 - -
|
||||
|
|
|
@ -13,6 +13,7 @@ Required properties:
|
|||
|
||||
- compatible: should be one of the following:
|
||||
"rockchip,rk3288-dw-hdmi"
|
||||
"rockchip,rk3328-dw-hdmi"
|
||||
"rockchip,rk3399-dw-hdmi"
|
||||
- reg: See dw_hdmi.txt.
|
||||
- reg-io-width: See dw_hdmi.txt. Shall be 4.
|
||||
|
@ -34,6 +35,8 @@ Optional properties
|
|||
- clock-names: May contain "cec" as defined in dw_hdmi.txt.
|
||||
- clock-names: May contain "grf", power for grf io.
|
||||
- clock-names: May contain "vpll", external clock for some hdmi phy.
|
||||
- phys: from general PHY binding: the phandle for the PHY device.
|
||||
- phy-names: Should be "hdmi" if phys references an external phy.
|
||||
|
||||
Example:
|
||||
|
||||
|
|
|
@ -79,6 +79,7 @@ Required properties:
|
|||
- compatible: value must be one of:
|
||||
* "allwinner,sun8i-a83t-dw-hdmi"
|
||||
* "allwinner,sun50i-a64-dw-hdmi", "allwinner,sun8i-a83t-dw-hdmi"
|
||||
* "allwinner,sun50i-h6-dw-hdmi"
|
||||
- reg: base address and size of memory-mapped region
|
||||
- reg-io-width: See dw_hdmi.txt. Shall be 1.
|
||||
- interrupts: HDMI interrupt number
|
||||
|
@ -86,9 +87,14 @@ Required properties:
|
|||
* iahb: the HDMI bus clock
|
||||
* isfr: the HDMI register clock
|
||||
* tmds: TMDS clock
|
||||
* cec: HDMI CEC clock (H6 only)
|
||||
* hdcp: HDCP clock (H6 only)
|
||||
* hdcp-bus: HDCP bus clock (H6 only)
|
||||
- clock-names: the clock names mentioned above
|
||||
- resets: phandle to the reset controller
|
||||
- reset-names: must be "ctrl"
|
||||
- resets:
|
||||
* ctrl: HDMI controller reset
|
||||
* hdcp: HDCP reset (H6 only)
|
||||
- reset-names: reset names mentioned above
|
||||
- phys: phandle to the DWC HDMI PHY
|
||||
- phy-names: must be "phy"
|
||||
|
||||
|
@ -109,6 +115,7 @@ Required properties:
|
|||
* allwinner,sun8i-h3-hdmi-phy
|
||||
* allwinner,sun8i-r40-hdmi-phy
|
||||
* allwinner,sun50i-a64-hdmi-phy
|
||||
* allwinner,sun50i-h6-hdmi-phy
|
||||
- reg: base address and size of memory-mapped region
|
||||
- clocks: phandles to the clocks feeding the HDMI PHY
|
||||
* bus: the HDMI PHY interface clock
|
||||
|
@ -158,6 +165,7 @@ Required properties:
|
|||
* allwinner,sun9i-a80-tcon-tv
|
||||
* "allwinner,sun50i-a64-tcon-lcd", "allwinner,sun8i-a83t-tcon-lcd"
|
||||
* "allwinner,sun50i-a64-tcon-tv", "allwinner,sun8i-a83t-tcon-tv"
|
||||
* allwinner,sun50i-h6-tcon-tv, allwinner,sun8i-r40-tcon-tv
|
||||
- reg: base address and size of memory-mapped region
|
||||
- interrupts: interrupt associated to this IP
|
||||
- clocks: phandles to the clocks feeding the TCON.
|
||||
|
@ -220,24 +228,26 @@ It allows display pipeline to be configured in very different ways:
|
|||
\ [3] TCON-TV1 [1] - TVE1/RGB
|
||||
|
||||
Note that both TCON TOP references same physical unit. Both mixers can be
|
||||
connected to any TCON.
|
||||
connected to any TCON. Not all TCON TOP variants support all features.
|
||||
|
||||
Required properties:
|
||||
- compatible: value must be one of:
|
||||
* allwinner,sun8i-r40-tcon-top
|
||||
* allwinner,sun50i-h6-tcon-top
|
||||
- reg: base address and size of the memory-mapped region.
|
||||
- clocks: phandle to the clocks feeding the TCON TOP
|
||||
* bus: TCON TOP interface clock
|
||||
* tcon-tv0: TCON TV0 clock
|
||||
* tve0: TVE0 clock
|
||||
* tcon-tv1: TCON TV1 clock
|
||||
* tve1: TVE0 clock
|
||||
* dsi: MIPI DSI clock
|
||||
* tve0: TVE0 clock (R40 only)
|
||||
* tcon-tv1: TCON TV1 clock (R40 only)
|
||||
* tve1: TVE0 clock (R40 only)
|
||||
* dsi: MIPI DSI clock (R40 only)
|
||||
- clock-names: clock name mentioned above
|
||||
- resets: phandle to the reset line driving the TCON TOP
|
||||
- #clock-cells : must contain 1
|
||||
- clock-output-names: Names of clocks created for TCON TV0 channel clock,
|
||||
TCON TV1 channel clock and DSI channel clock, in that order.
|
||||
TCON TV1 channel clock (R40 only) and DSI channel clock (R40 only), in
|
||||
that order.
|
||||
|
||||
- ports: A ports node with endpoint definitions as defined in
|
||||
Documentation/devicetree/bindings/media/video-interfaces.txt. 6 ports should
|
||||
|
@ -381,6 +391,7 @@ Required properties:
|
|||
* allwinner,sun8i-v3s-de2-mixer
|
||||
* allwinner,sun50i-a64-de2-mixer-0
|
||||
* allwinner,sun50i-a64-de2-mixer-1
|
||||
* allwinner,sun50i-h6-de3-mixer-0
|
||||
- reg: base address and size of the memory-mapped region.
|
||||
- clocks: phandles to the clocks feeding the mixer
|
||||
* bus: the mixer interface clock
|
||||
|
@ -415,9 +426,10 @@ Required properties:
|
|||
* allwinner,sun8i-v3s-display-engine
|
||||
* allwinner,sun9i-a80-display-engine
|
||||
* allwinner,sun50i-a64-display-engine
|
||||
* allwinner,sun50i-h6-display-engine
|
||||
|
||||
- allwinner,pipelines: list of phandle to the display engine
|
||||
frontends (DE 1.0) or mixers (DE 2.0) available.
|
||||
frontends (DE 1.0) or mixers (DE 2.0/3.0) available.
|
||||
|
||||
Example:
|
||||
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
Truly model NT35597 DSI display driver
|
||||
|
||||
The Truly NT35597 is a generic display driver, currently only configured
|
||||
for use in the 2K display on the Qualcomm SDM845 MTP board.
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "truly,nt35597-2K-display"
|
||||
- vdda-supply: phandle of the regulator that provides the supply voltage
|
||||
Power IC supply
|
||||
- vdispp-supply: phandle of the regulator that provides the supply voltage
|
||||
for positive LCD bias
|
||||
- vdispn-supply: phandle of the regulator that provides the supply voltage
|
||||
for negative LCD bias
|
||||
- reset-gpios: phandle of gpio for reset line
|
||||
This should be 8mA, gpio can be configured using mux, pinctrl, pinctrl-names
|
||||
(active low)
|
||||
- mode-gpios: phandle of the gpio for choosing the mode of the display
|
||||
for single DSI or Dual DSI
|
||||
This should be low for dual DSI and high for single DSI mode
|
||||
- ports: This device has two video ports driven by two DSIs. Their connections
|
||||
are modeled using the OF graph bindings specified in
|
||||
Documentation/devicetree/bindings/graph.txt.
|
||||
- port@0: DSI input port driven by master DSI
|
||||
- port@1: DSI input port driven by secondary DSI
|
||||
|
||||
Example:
|
||||
|
||||
dsi@ae94000 {
|
||||
panel@0 {
|
||||
compatible = "truly,nt35597-2K-display";
|
||||
reg = <0>;
|
||||
vdda-supply = <&pm8998_l14>;
|
||||
vdispp-supply = <&lab_regulator>;
|
||||
vdispn-supply = <&ibb_regulator>;
|
||||
pinctrl-names = "default", "suspend";
|
||||
pinctrl-0 = <&dpu_dsi_active>;
|
||||
pinctrl-1 = <&dpu_dsi_suspend>;
|
||||
|
||||
reset-gpios = <&tlmm 6 GPIO_ACTIVE_LOW>;
|
||||
mode-gpios = <&tlmm 52 GPIO_ACTIVE_HIGH>;
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
panel0_in: endpoint {
|
||||
remote-endpoint = <&dsi0_out>;
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
panel1_in: endpoint {
|
||||
remote-endpoint = <&dsi1_out>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
|
@ -67,6 +67,7 @@ capella Capella Microsystems, Inc
|
|||
cascoda Cascoda, Ltd.
|
||||
cavium Cavium, Inc.
|
||||
cdns Cadence Design Systems Inc.
|
||||
cdtech CDTech(H.K.) Electronics Limited
|
||||
ceva Ceva, Inc.
|
||||
chipidea Chipidea, Inc
|
||||
chipone ChipOne
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
===================================
|
||||
drm/amd/display - Display Core (DC)
|
||||
===================================
|
||||
|
||||
*placeholder - general description of supported platforms, what dc is, etc.*
|
||||
|
||||
Because it is partially shared with other operating systems, the Display Core
|
||||
Driver is divided in two pieces.
|
||||
|
||||
1. **Display Core (DC)** contains the OS-agnostic components. Things like
|
||||
hardware programming and resource management are handled here.
|
||||
2. **Display Manager (DM)** contains the OS-dependent components. Hooks to the
|
||||
amdgpu base driver and DRM are implemented here.
|
||||
|
||||
It doesn't help that the entire package is frequently referred to as DC. But
|
||||
with the context in mind, it should be clear.
|
||||
|
||||
When CONFIG_DRM_AMD_DC is enabled, DC will be initialized by default for
|
||||
supported ASICs. To force disable, set `amdgpu.dc=0` on kernel command line.
|
||||
Likewise, to force enable on unsupported ASICs, set `amdgpu.dc=1`.
|
||||
|
||||
To determine if DC is loaded, search dmesg for the following entry:
|
||||
|
||||
``Display Core initialized with <version number here>``
|
||||
|
||||
AMDgpu Display Manager
|
||||
======================
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
|
||||
:doc: overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
|
||||
:internal:
|
||||
|
||||
Lifecycle
|
||||
---------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
|
||||
:doc: DM Lifecycle
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
|
||||
:functions: dm_hw_init dm_hw_fini
|
||||
|
||||
Interrupts
|
||||
----------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
|
||||
:doc: overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
|
||||
:functions: register_hpd_handlers dm_crtc_high_irq dm_pflip_high_irq
|
||||
|
||||
Atomic Implementation
|
||||
---------------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
|
||||
:doc: atomic
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
|
||||
:functions: amdgpu_dm_atomic_check amdgpu_dm_atomic_commit_tail
|
||||
|
||||
Display Core
|
||||
============
|
||||
|
||||
**WIP**
|
|
@ -5,6 +5,7 @@ GPU Driver Documentation
|
|||
.. toctree::
|
||||
|
||||
amdgpu
|
||||
amdgpu-dc
|
||||
i915
|
||||
meson
|
||||
pl111
|
||||
|
|
|
@ -59,12 +59,6 @@ Implementing Asynchronous Atomic Commit
|
|||
.. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c
|
||||
:doc: implementing nonblocking commit
|
||||
|
||||
Atomic State Reset and Initialization
|
||||
-------------------------------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c
|
||||
:doc: atomic state reset and initialization
|
||||
|
||||
Helper Functions Reference
|
||||
--------------------------
|
||||
|
||||
|
@ -74,6 +68,21 @@ Helper Functions Reference
|
|||
.. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c
|
||||
:export:
|
||||
|
||||
Atomic State Reset and Initialization
|
||||
-------------------------------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_atomic_state_helper.c
|
||||
:doc: atomic state reset and initialization
|
||||
|
||||
Atomic State Helper Reference
|
||||
-----------------------------
|
||||
|
||||
.. kernel-doc:: include/drm/drm_atomic_state_helper.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_atomic_state_helper.c
|
||||
:export:
|
||||
|
||||
Simple KMS Helper Reference
|
||||
===========================
|
||||
|
||||
|
@ -223,6 +232,18 @@ MIPI DSI Helper Functions Reference
|
|||
.. kernel-doc:: drivers/gpu/drm/drm_mipi_dsi.c
|
||||
:export:
|
||||
|
||||
Display Stream Compression Helper Functions Reference
|
||||
=====================================================
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_dsc.c
|
||||
:doc: dsc helpers
|
||||
|
||||
.. kernel-doc:: include/drm/drm_dsc.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_dsc.c
|
||||
:export:
|
||||
|
||||
Output Probing Helper Functions Reference
|
||||
=========================================
|
||||
|
||||
|
|
|
@ -554,6 +554,18 @@ Plane Composition Properties
|
|||
.. kernel-doc:: drivers/gpu/drm/drm_blend.c
|
||||
:export:
|
||||
|
||||
FB_DAMAGE_CLIPS
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_damage_helper.c
|
||||
:doc: overview
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_damage_helper.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: include/drm/drm_damage_helper.h
|
||||
:internal:
|
||||
|
||||
Color Management Properties
|
||||
---------------------------
|
||||
|
||||
|
@ -575,6 +587,13 @@ Explicit Fencing Properties
|
|||
.. kernel-doc:: drivers/gpu/drm/drm_atomic_uapi.c
|
||||
:doc: explicit fencing properties
|
||||
|
||||
|
||||
Variable Refresh Properties
|
||||
---------------------------
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_connector.c
|
||||
:doc: Variable refresh properties
|
||||
|
||||
Existing KMS Properties
|
||||
-----------------------
|
||||
|
||||
|
|
|
@ -72,16 +72,13 @@ object TTM to provide a pool for buffer object allocation by clients and
|
|||
the kernel itself. The type of this object should be
|
||||
TTM_GLOBAL_TTM_BO, and its size should be sizeof(struct
|
||||
ttm_bo_global). Again, driver-specific init and release functions may
|
||||
be provided, likely eventually calling ttm_bo_global_init() and
|
||||
ttm_bo_global_release(), respectively. Also, like the previous
|
||||
be provided, likely eventually calling ttm_bo_global_ref_init() and
|
||||
ttm_bo_global_ref_release(), respectively. Also, like the previous
|
||||
object, ttm_global_item_ref() is used to create an initial reference
|
||||
count for the TTM, which will call your initialization function.
|
||||
|
||||
See the radeon_ttm.c file for an example of usage.
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_global.c
|
||||
:export:
|
||||
|
||||
|
||||
The Graphics Execution Manager (GEM)
|
||||
====================================
|
||||
|
|
|
@ -197,6 +197,9 @@ EPERM/EACCESS:
|
|||
difference between EACCESS and EPERM.
|
||||
|
||||
ENODEV:
|
||||
The device is not (yet) present or fully initialized.
|
||||
|
||||
EOPNOTSUPP:
|
||||
Feature (like PRIME, modesetting, GEM) is not supported by the driver.
|
||||
|
||||
ENXIO:
|
||||
|
|
|
@ -28,22 +28,16 @@ them, but also all the virtual ones used by KVM, so everyone qualifies).
|
|||
|
||||
Contact: Daniel Vetter, Thierry Reding, respective driver maintainers
|
||||
|
||||
Switch from reference/unreference to get/put
|
||||
--------------------------------------------
|
||||
|
||||
For some reason DRM core uses ``reference``/``unreference`` suffixes for
|
||||
refcounting functions, but kernel uses ``get``/``put`` (e.g.
|
||||
``kref_get``/``put()``). It would be good to switch over for consistency, and
|
||||
it's shorter. Needs to be done in 3 steps for each pair of functions:
|
||||
Remove custom dumb_map_offset implementations
|
||||
---------------------------------------------
|
||||
|
||||
* Create new ``get``/``put`` functions, define the old names as compatibility
|
||||
wrappers
|
||||
* Switch over each file/driver using a cocci-generated spatch.
|
||||
* Once all users of the old names are gone, remove them.
|
||||
All GEM based drivers should be using drm_gem_create_mmap_offset() instead.
|
||||
Audit each individual driver, make sure it'll work with the generic
|
||||
implementation (there's lots of outdated locking leftovers in various
|
||||
implementations), and then remove it.
|
||||
|
||||
This way drivers/patches in the progress of getting merged won't break.
|
||||
|
||||
Contact: Daniel Vetter
|
||||
Contact: Daniel Vetter, respective driver maintainers
|
||||
|
||||
Convert existing KMS drivers to atomic modesetting
|
||||
--------------------------------------------------
|
||||
|
@ -234,6 +228,34 @@ efficient.
|
|||
|
||||
Contact: Daniel Vetter
|
||||
|
||||
Defaults for .gem_prime_import and export
|
||||
-----------------------------------------
|
||||
|
||||
Most drivers don't need to set drm_driver->gem_prime_import and
|
||||
->gem_prime_export now that drm_gem_prime_import() and drm_gem_prime_export()
|
||||
are the default.
|
||||
|
||||
struct drm_gem_object_funcs
|
||||
---------------------------
|
||||
|
||||
GEM objects can now have a function table instead of having the callbacks on the
|
||||
DRM driver struct. This is now the preferred way and drivers can be moved over.
|
||||
|
||||
Use DRM_MODESET_LOCK_ALL_* helpers instead of boilerplate
|
||||
---------------------------------------------------------
|
||||
|
||||
For cases where drivers are attempting to grab the modeset locks with a local
|
||||
acquire context. Replace the boilerplate code surrounding
|
||||
drm_modeset_lock_all_ctx() with DRM_MODESET_LOCK_ALL_BEGIN() and
|
||||
DRM_MODESET_LOCK_ALL_END() instead.
|
||||
|
||||
This should also be done for all places where drm_modest_lock_all() is still
|
||||
used.
|
||||
|
||||
As a reference, take a look at the conversions already completed in drm core.
|
||||
|
||||
Contact: Sean Paul, respective driver maintainers
|
||||
|
||||
Core refactorings
|
||||
=================
|
||||
|
||||
|
@ -339,6 +361,16 @@ Some of these date from the very introduction of KMS in 2008 ...
|
|||
leftovers from older (never merged into upstream) KMS designs where modes
|
||||
where set using their ID, including support to add/remove modes.
|
||||
|
||||
- Make ->funcs and ->helper_private vtables optional. There's a bunch of empty
|
||||
function tables in drivers, but before we can remove them we need to make sure
|
||||
that all the users in helpers and drivers do correctly check for a NULL
|
||||
vtable.
|
||||
|
||||
- Cleanup up the various ->destroy callbacks. A lot of them just wrapt the
|
||||
drm_*_cleanup implementations and can be removed. Some tack a kfree() at the
|
||||
end, for which we could add drm_*_cleanup_kfree(). And then there's the (for
|
||||
historical reasons) misnamed drm_primary_helper_destroy() function.
|
||||
|
||||
Better Testing
|
||||
==============
|
||||
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
TODO
|
||||
====
|
||||
|
||||
CRC API
|
||||
-------
|
||||
CRC API Improvements
|
||||
--------------------
|
||||
|
||||
- Optimize CRC computation ``compute_crc()`` and plane blending ``blend()``
|
||||
|
||||
|
@ -22,3 +22,100 @@ CRC API
|
|||
|
||||
- Add igt test to check extreme alpha values i.e. fully opaque and fully
|
||||
transparent (intermediate values are affected by hw-specific rounding modes).
|
||||
|
||||
Vblank issues
|
||||
-------------
|
||||
|
||||
Some IGT test cases are failing. Need to analyze why and fix the issues:
|
||||
|
||||
- plain-flip-fb-recreate
|
||||
- plain-flip-ts-check
|
||||
- flip-vs-blocking-wf-vblank
|
||||
- plain-flip-fb-recreate-interruptible
|
||||
- flip-vs-wf_vblank-interruptible
|
||||
|
||||
Runtime Configuration
|
||||
---------------------
|
||||
|
||||
We want to be able to reconfigure vkms instance without having to reload the
|
||||
module. Use/Test-cases:
|
||||
|
||||
- Hotplug/hotremove connectors on the fly (to be able to test DP MST handling of
|
||||
compositors).
|
||||
|
||||
- Configure planes/crtcs/connectors (we'd need some code to have more than 1 of
|
||||
them first).
|
||||
|
||||
- Change output configuration: Plug/unplug screens, change EDID, allow changing
|
||||
the refresh rate.
|
||||
|
||||
The currently proposed solution is to expose vkms configuration through
|
||||
configfs. All existing module options should be supported through configfs too.
|
||||
|
||||
Add Plane Features
|
||||
------------------
|
||||
|
||||
There's lots of plane features we could add support for:
|
||||
|
||||
- Real overlay planes, not just cursor.
|
||||
|
||||
- Full alpha blending on all planes.
|
||||
|
||||
- Rotation, scaling.
|
||||
|
||||
- Additional buffer formats, especially YUV formats for video like NV12.
|
||||
Low/high bpp RGB formats would also be interesting.
|
||||
|
||||
- Async updates (currently only possible on cursor plane using the legacy cursor
|
||||
api).
|
||||
|
||||
For all of these, we also want to review the igt test coverage and make sure all
|
||||
relevant igt testcases work on vkms.
|
||||
|
||||
Writeback support
|
||||
-----------------
|
||||
|
||||
Currently vkms only computes a CRC for each frame. Once we have additional plane
|
||||
features, we could write back the entire composited frame, and expose it as:
|
||||
|
||||
- Writeback connector. This is useful for testing compositors if you don't have
|
||||
hardware with writeback support.
|
||||
|
||||
- As a v4l device. This is useful for debugging compositors on special vkms
|
||||
configurations, so that developers see what's really going on.
|
||||
|
||||
Prime Buffer Sharing
|
||||
--------------------
|
||||
|
||||
We already have vgem, which is a gem driver for testing rendering, similar to
|
||||
how vkms is for testing the modeset side. Adding buffer sharing support to vkms
|
||||
allows us to test them together, to test synchronization and lots of other
|
||||
features. Also, this allows compositors to test whether they work correctly on
|
||||
SoC chips, where the display and rendering is very often split between 2
|
||||
drivers.
|
||||
|
||||
Output Features
|
||||
---------------
|
||||
|
||||
- Variable refresh rate/freesync support. This probably needs prime buffer
|
||||
sharing support, so that we can use vgem fences to simulate rendering in
|
||||
testing. Also needs support to specify the EDID.
|
||||
|
||||
- Add support for link status, so that compositors can validate their runtime
|
||||
fallbacks when e.g. a Display Port link goes bad.
|
||||
|
||||
- All the hotplug handling describe under "Runtime Configuration".
|
||||
|
||||
Atomic Check using eBPF
|
||||
-----------------------
|
||||
|
||||
Atomic drivers have lots of restrictions which are not exposed to userspace in
|
||||
any explicit form through e.g. possible property values. Userspace can only
|
||||
inquiry about these limits through the atomic IOCTL, possibly using the
|
||||
TEST_ONLY flag. Trying to add configurable code for all these limits, to allow
|
||||
compositors to be tested against them, would be rather futile exercise. Instead
|
||||
we could add support for eBPF to validate any kind of atomic state, and
|
||||
implement a library of different restrictions.
|
||||
|
||||
This needs a bunch of features (plane compositing, multiple outputs, ...)
|
||||
enabled already to make sense.
|
||||
|
|
|
@ -143,7 +143,7 @@ using a number of wrapper functions:
|
|||
Query the address space, and return true if it is completely
|
||||
unevictable.
|
||||
|
||||
These are currently used in two places in the kernel:
|
||||
These are currently used in three places in the kernel:
|
||||
|
||||
(1) By ramfs to mark the address spaces of its inodes when they are created,
|
||||
and this mark remains for the life of the inode.
|
||||
|
@ -154,6 +154,10 @@ These are currently used in two places in the kernel:
|
|||
swapped out; the application must touch the pages manually if it wants to
|
||||
ensure they're in memory.
|
||||
|
||||
(3) By the i915 driver to mark pinned address space until it's unpinned. The
|
||||
amount of unevictable memory marked by i915 driver is roughly the bounded
|
||||
object size in debugfs/dri/0/i915_gem_objects.
|
||||
|
||||
|
||||
Detecting Unevictable Pages
|
||||
---------------------------
|
||||
|
|
15
MAINTAINERS
15
MAINTAINERS
|
@ -4737,6 +4737,13 @@ S: Maintained
|
|||
F: drivers/gpu/drm/tinydrm/ili9225.c
|
||||
F: Documentation/devicetree/bindings/display/ilitek,ili9225.txt
|
||||
|
||||
DRM DRIVER FOR HX8357D PANELS
|
||||
M: Eric Anholt <eric@anholt.net>
|
||||
T: git git://anongit.freedesktop.org/drm/drm-misc
|
||||
S: Maintained
|
||||
F: drivers/gpu/drm/tinydrm/hx8357d.c
|
||||
F: Documentation/devicetree/bindings/display/himax,hx8357d.txt
|
||||
|
||||
DRM DRIVER FOR INTEL I810 VIDEO CARDS
|
||||
S: Orphan / Obsolete
|
||||
F: drivers/gpu/drm/i810/
|
||||
|
@ -4778,6 +4785,12 @@ S: Supported
|
|||
F: drivers/gpu/drm/nouveau/
|
||||
F: include/uapi/drm/nouveau_drm.h
|
||||
|
||||
DRM DRIVER FOR OLIMEX LCD-OLINUXINO PANELS
|
||||
M: Stefan Mavrodiev <stefan@olimex.com>
|
||||
S: Maintained
|
||||
F: drivers/gpu/drm/panel/panel-olimex-lcd-olinuxino.c
|
||||
F: Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.txt
|
||||
|
||||
DRM DRIVER FOR PERVASIVE DISPLAYS REPAPER PANELS
|
||||
M: Noralf Trønnes <noralf@tronnes.org>
|
||||
S: Maintained
|
||||
|
@ -4843,10 +4856,8 @@ T: git git://anongit.freedesktop.org/drm/drm-misc
|
|||
|
||||
DRM DRIVER FOR VMWARE VIRTUAL GPU
|
||||
M: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
|
||||
M: Sinclair Yeh <syeh@vmware.com>
|
||||
M: Thomas Hellstrom <thellstrom@vmware.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
T: git git://people.freedesktop.org/~syeh/repos_linux
|
||||
T: git git://people.freedesktop.org/~thomash/linux
|
||||
S: Supported
|
||||
F: drivers/gpu/drm/vmwgfx/
|
||||
|
|
|
@ -30,13 +30,16 @@
|
|||
EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit);
|
||||
EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
|
||||
|
||||
static DEFINE_SPINLOCK(dma_fence_stub_lock);
|
||||
static struct dma_fence dma_fence_stub;
|
||||
|
||||
/*
|
||||
* fence context counter: each execution context should have its own
|
||||
* fence context, this allows checking if fences belong to the same
|
||||
* context or not. One device can have multiple separate contexts,
|
||||
* and they're used if some engine can run independently of another.
|
||||
*/
|
||||
static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0);
|
||||
static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1);
|
||||
|
||||
/**
|
||||
* DOC: DMA fences overview
|
||||
|
@ -68,6 +71,37 @@ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0);
|
|||
* &dma_buf.resv pointer.
|
||||
*/
|
||||
|
||||
static const char *dma_fence_stub_get_name(struct dma_fence *fence)
|
||||
{
|
||||
return "stub";
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops dma_fence_stub_ops = {
|
||||
.get_driver_name = dma_fence_stub_get_name,
|
||||
.get_timeline_name = dma_fence_stub_get_name,
|
||||
};
|
||||
|
||||
/**
|
||||
* dma_fence_get_stub - return a signaled fence
|
||||
*
|
||||
* Return a stub fence which is already signaled.
|
||||
*/
|
||||
struct dma_fence *dma_fence_get_stub(void)
|
||||
{
|
||||
spin_lock(&dma_fence_stub_lock);
|
||||
if (!dma_fence_stub.ops) {
|
||||
dma_fence_init(&dma_fence_stub,
|
||||
&dma_fence_stub_ops,
|
||||
&dma_fence_stub_lock,
|
||||
0, 0);
|
||||
dma_fence_signal_locked(&dma_fence_stub);
|
||||
}
|
||||
spin_unlock(&dma_fence_stub_lock);
|
||||
|
||||
return dma_fence_get(&dma_fence_stub);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_fence_get_stub);
|
||||
|
||||
/**
|
||||
* dma_fence_context_alloc - allocate an array of fence contexts
|
||||
* @num: amount of contexts to allocate
|
||||
|
|
|
@ -56,9 +56,10 @@ const char reservation_seqcount_string[] = "reservation_seqcount";
|
|||
EXPORT_SYMBOL(reservation_seqcount_string);
|
||||
|
||||
/**
|
||||
* reservation_object_reserve_shared - Reserve space to add a shared
|
||||
* fence to a reservation_object.
|
||||
* reservation_object_reserve_shared - Reserve space to add shared fences to
|
||||
* a reservation_object.
|
||||
* @obj: reservation object
|
||||
* @num_fences: number of fences we want to add
|
||||
*
|
||||
* Should be called before reservation_object_add_shared_fence(). Must
|
||||
* be called with obj->lock held.
|
||||
|
@ -66,107 +67,27 @@ EXPORT_SYMBOL(reservation_seqcount_string);
|
|||
* RETURNS
|
||||
* Zero for success, or -errno
|
||||
*/
|
||||
int reservation_object_reserve_shared(struct reservation_object *obj)
|
||||
int reservation_object_reserve_shared(struct reservation_object *obj,
|
||||
unsigned int num_fences)
|
||||
{
|
||||
struct reservation_object_list *fobj, *old;
|
||||
u32 max;
|
||||
struct reservation_object_list *old, *new;
|
||||
unsigned int i, j, k, max;
|
||||
|
||||
old = reservation_object_get_list(obj);
|
||||
|
||||
if (old && old->shared_max) {
|
||||
if (old->shared_count < old->shared_max) {
|
||||
/* perform an in-place update */
|
||||
kfree(obj->staged);
|
||||
obj->staged = NULL;
|
||||
if ((old->shared_count + num_fences) <= old->shared_max)
|
||||
return 0;
|
||||
} else
|
||||
max = old->shared_max * 2;
|
||||
} else
|
||||
max = 4;
|
||||
|
||||
/*
|
||||
* resize obj->staged or allocate if it doesn't exist,
|
||||
* noop if already correct size
|
||||
*/
|
||||
fobj = krealloc(obj->staged, offsetof(typeof(*fobj), shared[max]),
|
||||
GFP_KERNEL);
|
||||
if (!fobj)
|
||||
return -ENOMEM;
|
||||
|
||||
obj->staged = fobj;
|
||||
fobj->shared_max = max;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reservation_object_reserve_shared);
|
||||
|
||||
static void
|
||||
reservation_object_add_shared_inplace(struct reservation_object *obj,
|
||||
struct reservation_object_list *fobj,
|
||||
struct dma_fence *fence)
|
||||
{
|
||||
struct dma_fence *signaled = NULL;
|
||||
u32 i, signaled_idx;
|
||||
|
||||
dma_fence_get(fence);
|
||||
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&obj->seq);
|
||||
|
||||
for (i = 0; i < fobj->shared_count; ++i) {
|
||||
struct dma_fence *old_fence;
|
||||
|
||||
old_fence = rcu_dereference_protected(fobj->shared[i],
|
||||
reservation_object_held(obj));
|
||||
|
||||
if (old_fence->context == fence->context) {
|
||||
/* memory barrier is added by write_seqcount_begin */
|
||||
RCU_INIT_POINTER(fobj->shared[i], fence);
|
||||
write_seqcount_end(&obj->seq);
|
||||
preempt_enable();
|
||||
|
||||
dma_fence_put(old_fence);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!signaled && dma_fence_is_signaled(old_fence)) {
|
||||
signaled = old_fence;
|
||||
signaled_idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* memory barrier is added by write_seqcount_begin,
|
||||
* fobj->shared_count is protected by this lock too
|
||||
*/
|
||||
if (signaled) {
|
||||
RCU_INIT_POINTER(fobj->shared[signaled_idx], fence);
|
||||
else
|
||||
max = max(old->shared_count + num_fences,
|
||||
old->shared_max * 2);
|
||||
} else {
|
||||
BUG_ON(fobj->shared_count >= fobj->shared_max);
|
||||
RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
|
||||
fobj->shared_count++;
|
||||
max = 4;
|
||||
}
|
||||
|
||||
write_seqcount_end(&obj->seq);
|
||||
preempt_enable();
|
||||
|
||||
dma_fence_put(signaled);
|
||||
}
|
||||
|
||||
static void
|
||||
reservation_object_add_shared_replace(struct reservation_object *obj,
|
||||
struct reservation_object_list *old,
|
||||
struct reservation_object_list *fobj,
|
||||
struct dma_fence *fence)
|
||||
{
|
||||
unsigned i, j, k;
|
||||
|
||||
dma_fence_get(fence);
|
||||
|
||||
if (!old) {
|
||||
RCU_INIT_POINTER(fobj->shared[0], fence);
|
||||
fobj->shared_count = 1;
|
||||
goto done;
|
||||
}
|
||||
new = kmalloc(offsetof(typeof(*new), shared[max]), GFP_KERNEL);
|
||||
if (!new)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* no need to bump fence refcounts, rcu_read access
|
||||
|
@ -174,46 +95,45 @@ reservation_object_add_shared_replace(struct reservation_object *obj,
|
|||
* references from the old struct are carried over to
|
||||
* the new.
|
||||
*/
|
||||
for (i = 0, j = 0, k = fobj->shared_max; i < old->shared_count; ++i) {
|
||||
struct dma_fence *check;
|
||||
for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) {
|
||||
struct dma_fence *fence;
|
||||
|
||||
check = rcu_dereference_protected(old->shared[i],
|
||||
reservation_object_held(obj));
|
||||
|
||||
if (check->context == fence->context ||
|
||||
dma_fence_is_signaled(check))
|
||||
RCU_INIT_POINTER(fobj->shared[--k], check);
|
||||
fence = rcu_dereference_protected(old->shared[i],
|
||||
reservation_object_held(obj));
|
||||
if (dma_fence_is_signaled(fence))
|
||||
RCU_INIT_POINTER(new->shared[--k], fence);
|
||||
else
|
||||
RCU_INIT_POINTER(fobj->shared[j++], check);
|
||||
RCU_INIT_POINTER(new->shared[j++], fence);
|
||||
}
|
||||
fobj->shared_count = j;
|
||||
RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
|
||||
fobj->shared_count++;
|
||||
new->shared_count = j;
|
||||
new->shared_max = max;
|
||||
|
||||
done:
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&obj->seq);
|
||||
/*
|
||||
* RCU_INIT_POINTER can be used here,
|
||||
* seqcount provides the necessary barriers
|
||||
*/
|
||||
RCU_INIT_POINTER(obj->fence, fobj);
|
||||
RCU_INIT_POINTER(obj->fence, new);
|
||||
write_seqcount_end(&obj->seq);
|
||||
preempt_enable();
|
||||
|
||||
if (!old)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
/* Drop the references to the signaled fences */
|
||||
for (i = k; i < fobj->shared_max; ++i) {
|
||||
struct dma_fence *f;
|
||||
for (i = k; i < new->shared_max; ++i) {
|
||||
struct dma_fence *fence;
|
||||
|
||||
f = rcu_dereference_protected(fobj->shared[i],
|
||||
reservation_object_held(obj));
|
||||
dma_fence_put(f);
|
||||
fence = rcu_dereference_protected(new->shared[i],
|
||||
reservation_object_held(obj));
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
kfree_rcu(old, rcu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(reservation_object_reserve_shared);
|
||||
|
||||
/**
|
||||
* reservation_object_add_shared_fence - Add a fence to a shared slot
|
||||
|
@ -226,15 +146,39 @@ reservation_object_add_shared_replace(struct reservation_object *obj,
|
|||
void reservation_object_add_shared_fence(struct reservation_object *obj,
|
||||
struct dma_fence *fence)
|
||||
{
|
||||
struct reservation_object_list *old, *fobj = obj->staged;
|
||||
struct reservation_object_list *fobj;
|
||||
unsigned int i, count;
|
||||
|
||||
old = reservation_object_get_list(obj);
|
||||
obj->staged = NULL;
|
||||
dma_fence_get(fence);
|
||||
|
||||
if (!fobj)
|
||||
reservation_object_add_shared_inplace(obj, old, fence);
|
||||
else
|
||||
reservation_object_add_shared_replace(obj, old, fobj, fence);
|
||||
fobj = reservation_object_get_list(obj);
|
||||
count = fobj->shared_count;
|
||||
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&obj->seq);
|
||||
|
||||
for (i = 0; i < count; ++i) {
|
||||
struct dma_fence *old_fence;
|
||||
|
||||
old_fence = rcu_dereference_protected(fobj->shared[i],
|
||||
reservation_object_held(obj));
|
||||
if (old_fence->context == fence->context ||
|
||||
dma_fence_is_signaled(old_fence)) {
|
||||
dma_fence_put(old_fence);
|
||||
goto replace;
|
||||
}
|
||||
}
|
||||
|
||||
BUG_ON(fobj->shared_count >= fobj->shared_max);
|
||||
count++;
|
||||
|
||||
replace:
|
||||
RCU_INIT_POINTER(fobj->shared[i], fence);
|
||||
/* pointer update must be visible before we extend the shared_count */
|
||||
smp_store_mb(fobj->shared_count, count);
|
||||
|
||||
write_seqcount_end(&obj->seq);
|
||||
preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(reservation_object_add_shared_fence);
|
||||
|
||||
|
@ -343,9 +287,6 @@ int reservation_object_copy_fences(struct reservation_object *dst,
|
|||
new = dma_fence_get_rcu_safe(&src->fence_excl);
|
||||
rcu_read_unlock();
|
||||
|
||||
kfree(dst->staged);
|
||||
dst->staged = NULL;
|
||||
|
||||
src_list = reservation_object_get_list(dst);
|
||||
old = reservation_object_get_excl(dst);
|
||||
|
||||
|
|
|
@ -10,8 +10,8 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \
|
|||
drm_scatter.o drm_pci.o \
|
||||
drm_sysfs.o drm_hashtab.o drm_mm.o \
|
||||
drm_crtc.o drm_fourcc.o drm_modes.o drm_edid.o \
|
||||
drm_info.o drm_encoder_slave.o \
|
||||
drm_trace_points.o drm_global.o drm_prime.o \
|
||||
drm_encoder_slave.o \
|
||||
drm_trace_points.o drm_prime.o \
|
||||
drm_rect.o drm_vma_manager.o drm_flip_work.o \
|
||||
drm_modeset_lock.o drm_atomic.o drm_bridge.o \
|
||||
drm_framebuffer.o drm_connector.o drm_blend.o \
|
||||
|
@ -32,11 +32,12 @@ drm-$(CONFIG_AGP) += drm_agpsupport.o
|
|||
drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
|
||||
drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
|
||||
|
||||
drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
|
||||
drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_dsc.o drm_probe_helper.o \
|
||||
drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
|
||||
drm_kms_helper_common.o drm_dp_dual_mode_helper.o \
|
||||
drm_simple_kms_helper.o drm_modeset_helper.o \
|
||||
drm_scdc_helper.o drm_gem_framebuffer_helper.o
|
||||
drm_scdc_helper.o drm_gem_framebuffer_helper.o \
|
||||
drm_atomic_state_helper.o drm_damage_helper.o
|
||||
|
||||
drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
|
||||
drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
|
||||
|
|
|
@ -53,7 +53,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
|
|||
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
|
||||
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
|
||||
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
|
||||
amdgpu_gmc.o amdgpu_xgmi.o
|
||||
amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o
|
||||
|
||||
# add asic specific block
|
||||
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
|
||||
|
@ -105,6 +105,7 @@ amdgpu-y += \
|
|||
# add GFX block
|
||||
amdgpu-y += \
|
||||
amdgpu_gfx.o \
|
||||
amdgpu_rlc.o \
|
||||
gfx_v8_0.o \
|
||||
gfx_v9_0.o
|
||||
|
||||
|
|
|
@ -75,11 +75,14 @@
|
|||
#include "amdgpu_sdma.h"
|
||||
#include "amdgpu_dm.h"
|
||||
#include "amdgpu_virt.h"
|
||||
#include "amdgpu_csa.h"
|
||||
#include "amdgpu_gart.h"
|
||||
#include "amdgpu_debugfs.h"
|
||||
#include "amdgpu_job.h"
|
||||
#include "amdgpu_bo_list.h"
|
||||
#include "amdgpu_gem.h"
|
||||
#include "amdgpu_doorbell.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
||||
#define MAX_GPU_INSTANCE 16
|
||||
|
||||
|
@ -161,6 +164,7 @@ extern int amdgpu_si_support;
|
|||
extern int amdgpu_cik_support;
|
||||
#endif
|
||||
|
||||
#define AMDGPU_VM_MAX_NUM_CTX 4096
|
||||
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
|
||||
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
|
||||
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
|
||||
|
@ -359,123 +363,6 @@ struct amdgpu_sa_bo {
|
|||
int amdgpu_fence_slab_init(void);
|
||||
void amdgpu_fence_slab_fini(void);
|
||||
|
||||
/*
|
||||
* GPU doorbell structures, functions & helpers
|
||||
*/
|
||||
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
|
||||
{
|
||||
AMDGPU_DOORBELL_KIQ = 0x000,
|
||||
AMDGPU_DOORBELL_HIQ = 0x001,
|
||||
AMDGPU_DOORBELL_DIQ = 0x002,
|
||||
AMDGPU_DOORBELL_MEC_RING0 = 0x010,
|
||||
AMDGPU_DOORBELL_MEC_RING1 = 0x011,
|
||||
AMDGPU_DOORBELL_MEC_RING2 = 0x012,
|
||||
AMDGPU_DOORBELL_MEC_RING3 = 0x013,
|
||||
AMDGPU_DOORBELL_MEC_RING4 = 0x014,
|
||||
AMDGPU_DOORBELL_MEC_RING5 = 0x015,
|
||||
AMDGPU_DOORBELL_MEC_RING6 = 0x016,
|
||||
AMDGPU_DOORBELL_MEC_RING7 = 0x017,
|
||||
AMDGPU_DOORBELL_GFX_RING0 = 0x020,
|
||||
AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0,
|
||||
AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1,
|
||||
AMDGPU_DOORBELL_IH = 0x1E8,
|
||||
AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
|
||||
AMDGPU_DOORBELL_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL_ASSIGNMENT;
|
||||
|
||||
struct amdgpu_doorbell {
|
||||
/* doorbell mmio */
|
||||
resource_size_t base;
|
||||
resource_size_t size;
|
||||
u32 __iomem *ptr;
|
||||
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
|
||||
};
|
||||
|
||||
/*
|
||||
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
|
||||
*/
|
||||
typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
|
||||
{
|
||||
/*
|
||||
* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
|
||||
* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
|
||||
* Compute related doorbells are allocated from 0x00 to 0x8a
|
||||
*/
|
||||
|
||||
|
||||
/* kernel scheduling */
|
||||
AMDGPU_DOORBELL64_KIQ = 0x00,
|
||||
|
||||
/* HSA interface queue and debug queue */
|
||||
AMDGPU_DOORBELL64_HIQ = 0x01,
|
||||
AMDGPU_DOORBELL64_DIQ = 0x02,
|
||||
|
||||
/* Compute engines */
|
||||
AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
|
||||
AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
|
||||
AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
|
||||
AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
|
||||
AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
|
||||
AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
|
||||
AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
|
||||
AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,
|
||||
|
||||
/* User queue doorbell range (128 doorbells) */
|
||||
AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
|
||||
AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,
|
||||
|
||||
/* Graphics engine */
|
||||
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
|
||||
|
||||
/*
|
||||
* Other graphics doorbells can be allocated here: from 0x8c to 0xdf
|
||||
* Graphics voltage island aperture 1
|
||||
* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
|
||||
*/
|
||||
|
||||
/* sDMA engines reserved from 0xe0 -oxef */
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0,
|
||||
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1,
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8,
|
||||
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xE9,
|
||||
|
||||
/* For vega10 sriov, the sdma doorbell must be fixed as follow
|
||||
* to keep the same setting with host driver, or it will
|
||||
* happen conflicts
|
||||
*/
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 = 0xF0,
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 = 0xF2,
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
|
||||
|
||||
/* Interrupt handler */
|
||||
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
|
||||
AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
|
||||
AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */
|
||||
|
||||
/* VCN engine use 32 bits doorbell */
|
||||
AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
|
||||
AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
|
||||
AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
|
||||
AMDGPU_DOORBELL64_VCN6_7 = 0xFB,
|
||||
|
||||
/* overlap the doorbell assignment with VCN as they are mutually exclusive
|
||||
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
|
||||
*/
|
||||
AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8,
|
||||
AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9,
|
||||
AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA,
|
||||
AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB,
|
||||
|
||||
AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC,
|
||||
AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD,
|
||||
AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE,
|
||||
AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF,
|
||||
|
||||
AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
|
||||
AMDGPU_DOORBELL64_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL64_ASSIGNMENT;
|
||||
|
||||
/*
|
||||
* IRQS.
|
||||
*/
|
||||
|
@ -653,6 +540,8 @@ struct amdgpu_asic_funcs {
|
|||
struct amdgpu_ring *ring);
|
||||
/* check if the asic needs a full reset of if soft reset will work */
|
||||
bool (*need_full_reset)(struct amdgpu_device *adev);
|
||||
/* initialize doorbell layout for specific asic*/
|
||||
void (*init_doorbell_index)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -831,7 +720,6 @@ struct amdgpu_device {
|
|||
bool need_dma32;
|
||||
bool need_swiotlb;
|
||||
bool accel_working;
|
||||
struct work_struct reset_work;
|
||||
struct notifier_block acpi_nb;
|
||||
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
|
||||
struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
|
||||
|
@ -976,6 +864,9 @@ struct amdgpu_device {
|
|||
/* GDS */
|
||||
struct amdgpu_gds gds;
|
||||
|
||||
/* KFD */
|
||||
struct amdgpu_kfd_dev kfd;
|
||||
|
||||
/* display related functionality */
|
||||
struct amdgpu_display_manager dm;
|
||||
|
||||
|
@ -989,9 +880,6 @@ struct amdgpu_device {
|
|||
atomic64_t visible_pin_size;
|
||||
atomic64_t gart_pin_size;
|
||||
|
||||
/* amdkfd interface */
|
||||
struct kfd_dev *kfd;
|
||||
|
||||
/* soc15 register offset based on ip, instance and segment */
|
||||
uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
|
||||
|
||||
|
@ -1023,6 +911,10 @@ struct amdgpu_device {
|
|||
unsigned long last_mm_index;
|
||||
bool in_gpu_reset;
|
||||
struct mutex lock_reset;
|
||||
struct amdgpu_doorbell_index doorbell_index;
|
||||
|
||||
int asic_reset_res;
|
||||
struct work_struct xgmi_reset_work;
|
||||
};
|
||||
|
||||
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
|
||||
|
@ -1047,11 +939,6 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
|
|||
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
|
||||
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
|
||||
|
||||
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
|
||||
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
|
||||
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
|
||||
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
|
||||
|
||||
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
|
||||
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
|
||||
|
||||
|
@ -1113,11 +1000,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||
#define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg))
|
||||
#define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v))
|
||||
|
||||
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
|
||||
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
|
||||
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
|
||||
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
|
||||
|
||||
#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
|
||||
#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
|
||||
|
||||
|
@ -1159,6 +1041,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
|
|||
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
|
||||
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
|
||||
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
|
||||
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
|
||||
|
||||
/* Common functions */
|
||||
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
|
||||
|
@ -1219,12 +1102,6 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
|
|||
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg);
|
||||
|
||||
|
||||
/*
|
||||
* functions used by amdgpu_xgmi.c
|
||||
*/
|
||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
|
||||
|
||||
/*
|
||||
* functions used by amdgpu_encoder.c
|
||||
*/
|
||||
|
@ -1252,6 +1129,9 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade
|
|||
int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
|
||||
u8 perf_req, bool advertise);
|
||||
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
|
||||
|
||||
void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
|
||||
struct amdgpu_dm_backlight_caps *caps);
|
||||
#else
|
||||
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
|
||||
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
|
||||
|
|
|
@ -41,28 +41,21 @@ struct amdgpu_atif_notification_cfg {
|
|||
};
|
||||
|
||||
struct amdgpu_atif_notifications {
|
||||
bool display_switch;
|
||||
bool expansion_mode_change;
|
||||
bool thermal_state;
|
||||
bool forced_power_state;
|
||||
bool system_power_state;
|
||||
bool display_conf_change;
|
||||
bool px_gfx_switch;
|
||||
bool brightness_change;
|
||||
bool dgpu_display_event;
|
||||
bool gpu_package_power_limit;
|
||||
};
|
||||
|
||||
struct amdgpu_atif_functions {
|
||||
bool system_params;
|
||||
bool sbios_requests;
|
||||
bool select_active_disp;
|
||||
bool lid_state;
|
||||
bool get_tv_standard;
|
||||
bool set_tv_standard;
|
||||
bool get_panel_expansion_mode;
|
||||
bool set_panel_expansion_mode;
|
||||
bool temperature_change;
|
||||
bool graphics_device_types;
|
||||
bool query_backlight_transfer_characteristics;
|
||||
bool ready_to_undock;
|
||||
bool external_gpu_information;
|
||||
};
|
||||
|
||||
struct amdgpu_atif {
|
||||
|
@ -72,6 +65,7 @@ struct amdgpu_atif {
|
|||
struct amdgpu_atif_functions functions;
|
||||
struct amdgpu_atif_notification_cfg notification_cfg;
|
||||
struct amdgpu_encoder *encoder_for_bl;
|
||||
struct amdgpu_dm_backlight_caps backlight_caps;
|
||||
};
|
||||
|
||||
/* Call the ATIF method
|
||||
|
@ -137,15 +131,12 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
|
|||
*/
|
||||
static void amdgpu_atif_parse_notification(struct amdgpu_atif_notifications *n, u32 mask)
|
||||
{
|
||||
n->display_switch = mask & ATIF_DISPLAY_SWITCH_REQUEST_SUPPORTED;
|
||||
n->expansion_mode_change = mask & ATIF_EXPANSION_MODE_CHANGE_REQUEST_SUPPORTED;
|
||||
n->thermal_state = mask & ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED;
|
||||
n->forced_power_state = mask & ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED;
|
||||
n->system_power_state = mask & ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED;
|
||||
n->display_conf_change = mask & ATIF_DISPLAY_CONF_CHANGE_REQUEST_SUPPORTED;
|
||||
n->px_gfx_switch = mask & ATIF_PX_GFX_SWITCH_REQUEST_SUPPORTED;
|
||||
n->brightness_change = mask & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED;
|
||||
n->dgpu_display_event = mask & ATIF_DGPU_DISPLAY_EVENT_SUPPORTED;
|
||||
n->gpu_package_power_limit = mask & ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -162,14 +153,11 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
|
|||
{
|
||||
f->system_params = mask & ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED;
|
||||
f->sbios_requests = mask & ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED;
|
||||
f->select_active_disp = mask & ATIF_SELECT_ACTIVE_DISPLAYS_SUPPORTED;
|
||||
f->lid_state = mask & ATIF_GET_LID_STATE_SUPPORTED;
|
||||
f->get_tv_standard = mask & ATIF_GET_TV_STANDARD_FROM_CMOS_SUPPORTED;
|
||||
f->set_tv_standard = mask & ATIF_SET_TV_STANDARD_IN_CMOS_SUPPORTED;
|
||||
f->get_panel_expansion_mode = mask & ATIF_GET_PANEL_EXPANSION_MODE_FROM_CMOS_SUPPORTED;
|
||||
f->set_panel_expansion_mode = mask & ATIF_SET_PANEL_EXPANSION_MODE_IN_CMOS_SUPPORTED;
|
||||
f->temperature_change = mask & ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED;
|
||||
f->graphics_device_types = mask & ATIF_GET_GRAPHICS_DEVICE_TYPES_SUPPORTED;
|
||||
f->query_backlight_transfer_characteristics =
|
||||
mask & ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED;
|
||||
f->ready_to_undock = mask & ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED;
|
||||
f->external_gpu_information = mask & ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -310,6 +298,65 @@ static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif)
|
|||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_atif_query_backlight_caps - get min and max backlight input signal
|
||||
*
|
||||
* @handle: acpi handle
|
||||
*
|
||||
* Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
|
||||
* to determine the acceptable range of backlight values
|
||||
*
|
||||
* Backlight_caps.caps_valid will be set to true if the query is successful
|
||||
*
|
||||
* The input signals are in range 0-255
|
||||
*
|
||||
* This function assumes the display with backlight is the first LCD
|
||||
*
|
||||
* Returns 0 on success, error on failure.
|
||||
*/
|
||||
static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
|
||||
{
|
||||
union acpi_object *info;
|
||||
struct atif_qbtc_output characteristics;
|
||||
struct atif_qbtc_arguments arguments;
|
||||
struct acpi_buffer params;
|
||||
size_t size;
|
||||
int err = 0;
|
||||
|
||||
arguments.size = sizeof(arguments);
|
||||
arguments.requested_display = ATIF_QBTC_REQUEST_LCD1;
|
||||
|
||||
params.length = sizeof(arguments);
|
||||
params.pointer = (void *)&arguments;
|
||||
|
||||
info = amdgpu_atif_call(atif,
|
||||
ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS,
|
||||
¶ms);
|
||||
if (!info) {
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
size = *(u16 *) info->buffer.pointer;
|
||||
if (size < 10) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
memset(&characteristics, 0, sizeof(characteristics));
|
||||
size = min(sizeof(characteristics), size);
|
||||
memcpy(&characteristics, info->buffer.pointer, size);
|
||||
|
||||
atif->backlight_caps.caps_valid = true;
|
||||
atif->backlight_caps.min_input_signal =
|
||||
characteristics.min_input_signal;
|
||||
atif->backlight_caps.max_input_signal =
|
||||
characteristics.max_input_signal;
|
||||
out:
|
||||
kfree(info);
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_atif_get_sbios_requests - get requested sbios event
|
||||
*
|
||||
|
@ -799,6 +846,17 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
|
|||
}
|
||||
}
|
||||
|
||||
if (atif->functions.query_backlight_transfer_characteristics) {
|
||||
ret = amdgpu_atif_query_backlight_caps(atif);
|
||||
if (ret) {
|
||||
DRM_DEBUG_DRIVER("Call to QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS failed: %d\n",
|
||||
ret);
|
||||
atif->backlight_caps.caps_valid = false;
|
||||
}
|
||||
} else {
|
||||
atif->backlight_caps.caps_valid = false;
|
||||
}
|
||||
|
||||
out:
|
||||
adev->acpi_nb.notifier_call = amdgpu_acpi_event;
|
||||
register_acpi_notifier(&adev->acpi_nb);
|
||||
|
@ -806,6 +864,18 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
|
|||
return ret;
|
||||
}
|
||||
|
||||
void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
|
||||
struct amdgpu_dm_backlight_caps *caps)
|
||||
{
|
||||
if (!adev->atif) {
|
||||
caps->caps_valid = false;
|
||||
return;
|
||||
}
|
||||
caps->caps_valid = adev->atif->backlight_caps.caps_valid;
|
||||
caps->min_input_signal = adev->atif->backlight_caps.min_input_signal;
|
||||
caps->max_input_signal = adev->atif->backlight_caps.max_input_signal;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_acpi_fini - tear down driver acpi support
|
||||
*
|
||||
|
@ -816,6 +886,5 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
|
|||
void amdgpu_acpi_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
unregister_acpi_notifier(&adev->acpi_nb);
|
||||
if (adev->atif)
|
||||
kfree(adev->atif);
|
||||
kfree(adev->atif);
|
||||
}
|
||||
|
|
|
@ -26,15 +26,26 @@
|
|||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include <linux/module.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
const struct kgd2kfd_calls *kgd2kfd;
|
||||
|
||||
static const unsigned int compute_vmid_bitmap = 0xFF00;
|
||||
|
||||
/* Total memory size in system memory and all GPU VRAM. Used to
|
||||
* estimate worst case amount of memory to reserve for page tables
|
||||
*/
|
||||
uint64_t amdgpu_amdkfd_total_mem_size;
|
||||
|
||||
int amdgpu_amdkfd_init(void)
|
||||
{
|
||||
struct sysinfo si;
|
||||
int ret;
|
||||
|
||||
si_meminfo(&si);
|
||||
amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
|
||||
amdgpu_amdkfd_total_mem_size *= si.mem_unit;
|
||||
|
||||
#ifdef CONFIG_HSA_AMD
|
||||
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
|
||||
if (ret)
|
||||
|
@ -73,9 +84,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
|
|||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_RAVEN:
|
||||
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
|
||||
|
@ -85,8 +98,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
|
|||
return;
|
||||
}
|
||||
|
||||
adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
|
||||
adev->pdev, kfd2kgd);
|
||||
adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev,
|
||||
adev->pdev, kfd2kgd);
|
||||
|
||||
if (adev->kfd.dev)
|
||||
amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -126,7 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
{
|
||||
int i, n;
|
||||
int last_valid_bit;
|
||||
if (adev->kfd) {
|
||||
|
||||
if (adev->kfd.dev) {
|
||||
struct kgd2kfd_shared_resources gpu_resources = {
|
||||
.compute_vmid_bitmap = compute_vmid_bitmap,
|
||||
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
|
||||
|
@ -144,7 +161,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
KGD_MAX_QUEUES);
|
||||
|
||||
/* remove the KIQ bit as well */
|
||||
if (adev->gfx.kiq.ring.ready)
|
||||
if (adev->gfx.kiq.ring.sched.ready)
|
||||
clear_bit(amdgpu_gfx_queue_to_bit(adev,
|
||||
adev->gfx.kiq.ring.me - 1,
|
||||
adev->gfx.kiq.ring.pipe,
|
||||
|
@ -165,7 +182,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
&gpu_resources.doorbell_start_offset);
|
||||
|
||||
if (adev->asic_type < CHIP_VEGA10) {
|
||||
kgd2kfd->device_init(adev->kfd, &gpu_resources);
|
||||
kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -179,25 +196,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
* process in case of 64-bit doorbells so we
|
||||
* can use each doorbell assignment twice.
|
||||
*/
|
||||
if (adev->asic_type == CHIP_VEGA10) {
|
||||
gpu_resources.sdma_doorbell[0][i] =
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[0][i+1] =
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i] =
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i+1] =
|
||||
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
|
||||
} else {
|
||||
gpu_resources.sdma_doorbell[0][i] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[0][i+1] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i+1] =
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
|
||||
}
|
||||
gpu_resources.sdma_doorbell[0][i] =
|
||||
adev->doorbell_index.sdma_engine0 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[0][i+1] =
|
||||
adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i] =
|
||||
adev->doorbell_index.sdma_engine1 + (i >> 1);
|
||||
gpu_resources.sdma_doorbell[1][i+1] =
|
||||
adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1);
|
||||
}
|
||||
/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
|
||||
* SDMA, IH and VCN. So don't use them for the CP.
|
||||
|
@ -205,37 +211,37 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
|
|||
gpu_resources.reserved_doorbell_mask = 0x1e0;
|
||||
gpu_resources.reserved_doorbell_val = 0x0e0;
|
||||
|
||||
kgd2kfd->device_init(adev->kfd, &gpu_resources);
|
||||
kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->kfd) {
|
||||
kgd2kfd->device_exit(adev->kfd);
|
||||
adev->kfd = NULL;
|
||||
if (adev->kfd.dev) {
|
||||
kgd2kfd->device_exit(adev->kfd.dev);
|
||||
adev->kfd.dev = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
|
||||
const void *ih_ring_entry)
|
||||
{
|
||||
if (adev->kfd)
|
||||
kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
|
||||
if (adev->kfd.dev)
|
||||
kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->kfd)
|
||||
kgd2kfd->suspend(adev->kfd);
|
||||
if (adev->kfd.dev)
|
||||
kgd2kfd->suspend(adev->kfd.dev);
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd)
|
||||
r = kgd2kfd->resume(adev->kfd);
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd->resume(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -244,8 +250,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
|
|||
{
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd)
|
||||
r = kgd2kfd->pre_reset(adev->kfd);
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd->pre_reset(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -254,8 +260,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
|
|||
{
|
||||
int r = 0;
|
||||
|
||||
if (adev->kfd)
|
||||
r = kgd2kfd->post_reset(adev->kfd);
|
||||
if (adev->kfd.dev)
|
||||
r = kgd2kfd->post_reset(adev->kfd.dev);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -268,9 +274,9 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
|
|||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr, bool mqd_gfx9)
|
||||
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr, bool mqd_gfx9)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct amdgpu_bo *bo = NULL;
|
||||
|
@ -340,7 +346,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
|||
return r;
|
||||
}
|
||||
|
||||
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
||||
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
||||
{
|
||||
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
|
||||
|
||||
|
@ -351,8 +357,8 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
|
|||
amdgpu_bo_unref(&(bo));
|
||||
}
|
||||
|
||||
void get_local_mem_info(struct kgd_dev *kgd,
|
||||
struct kfd_local_mem_info *mem_info)
|
||||
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
|
||||
struct kfd_local_mem_info *mem_info)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
|
||||
|
@ -383,7 +389,7 @@ void get_local_mem_info(struct kgd_dev *kgd,
|
|||
mem_info->mem_clk_max = 100;
|
||||
}
|
||||
|
||||
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
|
||||
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
||||
|
@ -392,7 +398,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
|
|||
return 0;
|
||||
}
|
||||
|
||||
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
|
||||
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
||||
|
@ -405,7 +411,7 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
|
|||
return 100;
|
||||
}
|
||||
|
||||
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
|
||||
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
|
||||
|
@ -428,6 +434,62 @@ void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
|
|||
cu_info->lds_size = acu_info.lds_size;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
|
||||
struct kgd_dev **dma_buf_kgd,
|
||||
uint64_t *bo_size, void *metadata_buffer,
|
||||
size_t buffer_size, uint32_t *metadata_size,
|
||||
uint32_t *flags)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct dma_buf *dma_buf;
|
||||
struct drm_gem_object *obj;
|
||||
struct amdgpu_bo *bo;
|
||||
uint64_t metadata_flags;
|
||||
int r = -EINVAL;
|
||||
|
||||
dma_buf = dma_buf_get(dma_buf_fd);
|
||||
if (IS_ERR(dma_buf))
|
||||
return PTR_ERR(dma_buf);
|
||||
|
||||
if (dma_buf->ops != &amdgpu_dmabuf_ops)
|
||||
/* Can't handle non-graphics buffers */
|
||||
goto out_put;
|
||||
|
||||
obj = dma_buf->priv;
|
||||
if (obj->dev->driver != adev->ddev->driver)
|
||||
/* Can't handle buffers from different drivers */
|
||||
goto out_put;
|
||||
|
||||
adev = obj->dev->dev_private;
|
||||
bo = gem_to_amdgpu_bo(obj);
|
||||
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
|
||||
AMDGPU_GEM_DOMAIN_GTT)))
|
||||
/* Only VRAM and GTT BOs are supported */
|
||||
goto out_put;
|
||||
|
||||
r = 0;
|
||||
if (dma_buf_kgd)
|
||||
*dma_buf_kgd = (struct kgd_dev *)adev;
|
||||
if (bo_size)
|
||||
*bo_size = amdgpu_bo_size(bo);
|
||||
if (metadata_size)
|
||||
*metadata_size = bo->metadata_size;
|
||||
if (metadata_buffer)
|
||||
r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
|
||||
metadata_size, &metadata_flags);
|
||||
if (flags) {
|
||||
*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
|
||||
ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
|
||||
|
||||
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
|
||||
*flags |= ALLOC_MEM_FLAGS_PUBLIC;
|
||||
}
|
||||
|
||||
out_put:
|
||||
dma_buf_put(dma_buf);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
|
@ -510,7 +572,7 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
|
|||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
|
||||
{
|
||||
if (adev->kfd) {
|
||||
if (adev->kfd.dev) {
|
||||
if ((1 << vmid) & compute_vmid_bitmap)
|
||||
return true;
|
||||
}
|
||||
|
@ -524,7 +586,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
|
|||
return false;
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
|
||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <kgd_kfd_interface.h>
|
||||
#include <drm/ttm/ttm_execbuf_util.h>
|
||||
|
@ -35,6 +34,7 @@
|
|||
#include "amdgpu_vm.h"
|
||||
|
||||
extern const struct kgd2kfd_calls *kgd2kfd;
|
||||
extern uint64_t amdgpu_amdkfd_total_mem_size;
|
||||
|
||||
struct amdgpu_device;
|
||||
|
||||
|
@ -77,6 +77,11 @@ struct amdgpu_amdkfd_fence {
|
|||
char timeline_name[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
struct amdgpu_kfd_dev {
|
||||
struct kfd_dev *dev;
|
||||
uint64_t vram_used;
|
||||
};
|
||||
|
||||
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
|
||||
struct mm_struct *mm);
|
||||
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
|
||||
|
@ -134,16 +139,21 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
|
|||
void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
|
||||
|
||||
/* Shared API */
|
||||
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr, bool mqd_gfx9);
|
||||
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
|
||||
void get_local_mem_info(struct kgd_dev *kgd,
|
||||
struct kfd_local_mem_info *mem_info);
|
||||
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
|
||||
int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
|
||||
void **mem_obj, uint64_t *gpu_addr,
|
||||
void **cpu_ptr, bool mqd_gfx9);
|
||||
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
|
||||
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
|
||||
struct kfd_local_mem_info *mem_info);
|
||||
uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
|
||||
|
||||
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
|
||||
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
|
||||
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
|
||||
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
|
||||
int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
|
||||
struct kgd_dev **dmabuf_kgd,
|
||||
uint64_t *bo_size, void *metadata_buffer,
|
||||
size_t buffer_size, uint32_t *metadata_size,
|
||||
uint32_t *flags);
|
||||
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
|
||||
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
|
||||
|
||||
|
@ -195,7 +205,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
|
|||
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
||||
struct kfd_vm_fault_info *info);
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
struct dma_buf *dmabuf,
|
||||
uint64_t va, void *vm,
|
||||
struct kgd_mem **mem, uint64_t *size,
|
||||
uint64_t *mmap_offset);
|
||||
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
|
||||
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
|
||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
|
||||
|
||||
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <linux/fdtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
@ -173,13 +174,6 @@ static int get_tile_config(struct kgd_dev *kgd,
|
|||
}
|
||||
|
||||
static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.init_gtt_mem_allocation = alloc_gtt_mem,
|
||||
.free_gtt_mem = free_gtt_mem,
|
||||
.get_local_mem_info = get_local_mem_info,
|
||||
.get_gpu_clock_counter = get_gpu_clock_counter,
|
||||
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
||||
.alloc_pasid = amdgpu_pasid_alloc,
|
||||
.free_pasid = amdgpu_pasid_free,
|
||||
.program_sh_mem_settings = kgd_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_init_interrupts,
|
||||
|
@ -200,28 +194,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
|||
.get_fw_version = get_fw_version,
|
||||
.set_scratch_backing_va = set_scratch_backing_va,
|
||||
.get_tile_config = get_tile_config,
|
||||
.get_cu_info = get_cu_info,
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
|
||||
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
|
||||
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
|
||||
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
|
||||
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
|
||||
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
|
||||
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
|
||||
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
|
||||
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
|
||||
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
|
||||
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
|
||||
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
|
||||
.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
|
||||
.gpu_recover = amdgpu_amdkfd_gpu_reset,
|
||||
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include <linux/fdtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
@ -128,13 +129,6 @@ static int get_tile_config(struct kgd_dev *kgd,
|
|||
}
|
||||
|
||||
static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.init_gtt_mem_allocation = alloc_gtt_mem,
|
||||
.free_gtt_mem = free_gtt_mem,
|
||||
.get_local_mem_info = get_local_mem_info,
|
||||
.get_gpu_clock_counter = get_gpu_clock_counter,
|
||||
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
||||
.alloc_pasid = amdgpu_pasid_alloc,
|
||||
.free_pasid = amdgpu_pasid_free,
|
||||
.program_sh_mem_settings = kgd_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_init_interrupts,
|
||||
|
@ -157,27 +151,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
|||
.get_fw_version = get_fw_version,
|
||||
.set_scratch_backing_va = set_scratch_backing_va,
|
||||
.get_tile_config = get_tile_config,
|
||||
.get_cu_info = get_cu_info,
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
|
||||
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
|
||||
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
|
||||
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
|
||||
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
|
||||
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
|
||||
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
|
||||
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
|
||||
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
|
||||
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
|
||||
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
|
||||
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
|
||||
.gpu_recover = amdgpu_amdkfd_gpu_reset,
|
||||
.set_compute_idle = amdgpu_amdkfd_set_compute_idle
|
||||
};
|
||||
|
||||
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/fdtable.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_amdkfd.h"
|
||||
|
@ -46,38 +47,9 @@
|
|||
#include "v9_structs.h"
|
||||
#include "soc15.h"
|
||||
#include "soc15d.h"
|
||||
#include "mmhub_v1_0.h"
|
||||
#include "gfxhub_v1_0.h"
|
||||
|
||||
/* HACK: MMHUB and GC both have VM-related register with the same
|
||||
* names but different offsets. Define the MMHUB register we need here
|
||||
* with a prefix. A proper solution would be to move the functions
|
||||
* programming these registers into gfx_v9_0.c and mmhub_v1_0.c
|
||||
* respectively.
|
||||
*/
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
|
||||
#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
|
||||
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
|
||||
#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
|
||||
|
||||
#define V9_PIPE_PER_MEC (4)
|
||||
#define V9_QUEUES_PER_PIPE_MEC (8)
|
||||
|
@ -167,13 +139,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
|
|||
}
|
||||
|
||||
static const struct kfd2kgd_calls kfd2kgd = {
|
||||
.init_gtt_mem_allocation = alloc_gtt_mem,
|
||||
.free_gtt_mem = free_gtt_mem,
|
||||
.get_local_mem_info = get_local_mem_info,
|
||||
.get_gpu_clock_counter = get_gpu_clock_counter,
|
||||
.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
|
||||
.alloc_pasid = amdgpu_pasid_alloc,
|
||||
.free_pasid = amdgpu_pasid_free,
|
||||
.program_sh_mem_settings = kgd_program_sh_mem_settings,
|
||||
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
|
||||
.init_interrupts = kgd_init_interrupts,
|
||||
|
@ -196,26 +161,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
|
|||
.get_fw_version = get_fw_version,
|
||||
.set_scratch_backing_va = set_scratch_backing_va,
|
||||
.get_tile_config = amdgpu_amdkfd_get_tile_config,
|
||||
.get_cu_info = get_cu_info,
|
||||
.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
|
||||
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
|
||||
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
|
||||
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
|
||||
.release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
|
||||
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
|
||||
.set_vm_context_page_table_base = set_vm_context_page_table_base,
|
||||
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
|
||||
.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
|
||||
.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
|
||||
.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
|
||||
.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
|
||||
.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
|
||||
.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
|
||||
.invalidate_tlbs = invalidate_tlbs,
|
||||
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
|
||||
.submit_ib = amdgpu_amdkfd_submit_ib,
|
||||
.gpu_recover = amdgpu_amdkfd_gpu_reset,
|
||||
.set_compute_idle = amdgpu_amdkfd_set_compute_idle,
|
||||
.get_hive_id = amdgpu_amdkfd_get_hive_id,
|
||||
};
|
||||
|
||||
|
@ -785,15 +733,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
|
|||
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
|
||||
uint32_t req = (1 << vmid) |
|
||||
(0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
|
||||
VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
|
||||
|
||||
mutex_lock(&adev->srbm_mutex);
|
||||
|
||||
/* Use legacy mode tlb invalidation.
|
||||
*
|
||||
|
@ -810,34 +749,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
|
|||
* TODO 2: support range-based invalidation, requires kfg2kgd
|
||||
* interface change
|
||||
*/
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
|
||||
0xffffffff);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
|
||||
0x0000001f);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
|
||||
0xffffffff);
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
|
||||
0x0000001f);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
|
||||
req);
|
||||
|
||||
while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
|
||||
(1 << vmid)))
|
||||
cpu_relax();
|
||||
|
||||
while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
|
||||
mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
|
||||
(1 << vmid)))
|
||||
cpu_relax();
|
||||
|
||||
mutex_unlock(&adev->srbm_mutex);
|
||||
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
|
||||
}
|
||||
|
||||
static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
|
||||
|
@ -876,7 +788,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
|
|||
if (adev->in_gpu_reset)
|
||||
return -EIO;
|
||||
|
||||
if (ring->ready)
|
||||
if (ring->sched.ready)
|
||||
return invalidate_tlbs_with_kiq(adev, pasid);
|
||||
|
||||
for (vmid = 0; vmid < 16; vmid++) {
|
||||
|
@ -1016,7 +928,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
|||
uint64_t page_table_base)
|
||||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
uint64_t base = page_table_base | AMDGPU_PTE_VALID;
|
||||
|
||||
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
|
||||
pr_err("trying to set page table base for wrong VMID %u\n",
|
||||
|
@ -1028,25 +939,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
|
|||
* now, all processes share the same address space size, like
|
||||
* on GFX8 and older.
|
||||
*/
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
|
||||
mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
|
||||
lower_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
|
||||
upper_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
|
||||
WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
|
||||
lower_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
|
||||
upper_32_bits(adev->vm_manager.max_pfn - 1));
|
||||
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
|
||||
WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
|
||||
gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <linux/list.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <drm/drmP.h>
|
||||
#include "amdgpu_object.h"
|
||||
#include "amdgpu_vm.h"
|
||||
|
@ -46,9 +47,9 @@
|
|||
/* Impose limit on how much memory KFD can use */
|
||||
static struct {
|
||||
uint64_t max_system_mem_limit;
|
||||
uint64_t max_userptr_mem_limit;
|
||||
uint64_t max_ttm_mem_limit;
|
||||
int64_t system_mem_used;
|
||||
int64_t userptr_mem_used;
|
||||
int64_t ttm_mem_used;
|
||||
spinlock_t mem_limit_lock;
|
||||
} kfd_mem_limit;
|
||||
|
||||
|
@ -90,8 +91,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
|
|||
}
|
||||
|
||||
/* Set memory usage limits. Current, limits are
|
||||
* System (kernel) memory - 3/8th System RAM
|
||||
* Userptr memory - 3/4th System RAM
|
||||
* System (TTM + userptr) memory - 3/4th System RAM
|
||||
* TTM memory - 3/8th System RAM
|
||||
*/
|
||||
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
||||
{
|
||||
|
@ -103,48 +104,61 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
|
|||
mem *= si.mem_unit;
|
||||
|
||||
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
|
||||
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
|
||||
kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
|
||||
pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
|
||||
kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
|
||||
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
|
||||
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
|
||||
(kfd_mem_limit.max_system_mem_limit >> 20),
|
||||
(kfd_mem_limit.max_userptr_mem_limit >> 20));
|
||||
(kfd_mem_limit.max_ttm_mem_limit >> 20));
|
||||
}
|
||||
|
||||
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 domain)
|
||||
static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 domain, bool sg)
|
||||
{
|
||||
size_t acc_size;
|
||||
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
|
||||
uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
|
||||
int ret = 0;
|
||||
|
||||
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
|
||||
sizeof(struct amdgpu_bo));
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
vram_needed = 0;
|
||||
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
|
||||
if (kfd_mem_limit.system_mem_used + (acc_size + size) >
|
||||
kfd_mem_limit.max_system_mem_limit) {
|
||||
ret = -ENOMEM;
|
||||
goto err_no_mem;
|
||||
}
|
||||
kfd_mem_limit.system_mem_used += (acc_size + size);
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
|
||||
if ((kfd_mem_limit.system_mem_used + acc_size >
|
||||
kfd_mem_limit.max_system_mem_limit) ||
|
||||
(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
|
||||
kfd_mem_limit.max_userptr_mem_limit)) {
|
||||
ret = -ENOMEM;
|
||||
goto err_no_mem;
|
||||
}
|
||||
kfd_mem_limit.system_mem_used += acc_size;
|
||||
kfd_mem_limit.userptr_mem_used += size;
|
||||
/* TTM GTT memory */
|
||||
system_mem_needed = acc_size + size;
|
||||
ttm_mem_needed = acc_size + size;
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
|
||||
/* Userptr */
|
||||
system_mem_needed = acc_size + size;
|
||||
ttm_mem_needed = acc_size;
|
||||
} else {
|
||||
/* VRAM and SG */
|
||||
system_mem_needed = acc_size;
|
||||
ttm_mem_needed = acc_size;
|
||||
if (domain == AMDGPU_GEM_DOMAIN_VRAM)
|
||||
vram_needed = size;
|
||||
}
|
||||
err_no_mem:
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
|
||||
kfd_mem_limit.max_system_mem_limit) ||
|
||||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||
kfd_mem_limit.max_ttm_mem_limit) ||
|
||||
(adev->kfd.vram_used + vram_needed >
|
||||
adev->gmc.real_vram_size - reserved_for_pt)) {
|
||||
ret = -ENOMEM;
|
||||
} else {
|
||||
kfd_mem_limit.system_mem_used += system_mem_needed;
|
||||
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
|
||||
adev->kfd.vram_used += vram_needed;
|
||||
}
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void unreserve_system_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 domain)
|
||||
static void unreserve_mem_limit(struct amdgpu_device *adev,
|
||||
uint64_t size, u32 domain, bool sg)
|
||||
{
|
||||
size_t acc_size;
|
||||
|
||||
|
@ -154,35 +168,39 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
|
|||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
if (domain == AMDGPU_GEM_DOMAIN_GTT) {
|
||||
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
|
||||
kfd_mem_limit.ttm_mem_used -= (acc_size + size);
|
||||
} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
|
||||
kfd_mem_limit.system_mem_used -= (acc_size + size);
|
||||
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||
} else {
|
||||
kfd_mem_limit.system_mem_used -= acc_size;
|
||||
kfd_mem_limit.userptr_mem_used -= size;
|
||||
kfd_mem_limit.ttm_mem_used -= acc_size;
|
||||
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
|
||||
adev->kfd.vram_used -= size;
|
||||
WARN_ONCE(adev->kfd.vram_used < 0,
|
||||
"kfd VRAM memory accounting unbalanced");
|
||||
}
|
||||
}
|
||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||
"kfd system memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
|
||||
"kfd userptr memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
|
||||
"kfd TTM memory accounting unbalanced");
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
}
|
||||
|
||||
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
|
||||
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
|
||||
{
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||
u32 domain = bo->preferred_domains;
|
||||
bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
|
||||
|
||||
if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
|
||||
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
|
||||
kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
|
||||
} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
|
||||
kfd_mem_limit.system_mem_used -=
|
||||
(bo->tbo.acc_size + amdgpu_bo_size(bo));
|
||||
domain = AMDGPU_GEM_DOMAIN_CPU;
|
||||
sg = false;
|
||||
}
|
||||
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
|
||||
"kfd system memory accounting unbalanced");
|
||||
WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
|
||||
"kfd userptr memory accounting unbalanced");
|
||||
|
||||
spin_unlock(&kfd_mem_limit.mem_limit_lock);
|
||||
unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
|
||||
}
|
||||
|
||||
|
||||
|
@ -395,23 +413,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
|
||||
struct dma_fence *f)
|
||||
{
|
||||
int ret = amdgpu_sync_fence(adev, sync, f, false);
|
||||
|
||||
/* Sync objects can't handle multiple GPUs (contexts) updating
|
||||
* sync->last_vm_update. Fortunately we don't need it for
|
||||
* KFD's purposes, so we can just drop that fence.
|
||||
*/
|
||||
if (sync->last_vm_update) {
|
||||
dma_fence_put(sync->last_vm_update);
|
||||
sync->last_vm_update = NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
|
||||
{
|
||||
struct amdgpu_bo *pd = vm->root.base.bo;
|
||||
|
@ -422,7 +423,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
return sync_vm_fence(adev, sync, vm->last_update);
|
||||
return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
|
||||
}
|
||||
|
||||
/* add_bo_to_vm - Add a BO to a VM
|
||||
|
@ -536,7 +537,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
|
|||
struct amdgpu_bo *bo = mem->bo;
|
||||
|
||||
INIT_LIST_HEAD(&entry->head);
|
||||
entry->shared = true;
|
||||
entry->num_shared = 1;
|
||||
entry->bo = &bo->tbo;
|
||||
mutex_lock(&process_info->lock);
|
||||
if (userptr)
|
||||
|
@ -677,7 +678,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
|
|||
|
||||
ctx->kfd_bo.priority = 0;
|
||||
ctx->kfd_bo.tv.bo = &bo->tbo;
|
||||
ctx->kfd_bo.tv.shared = true;
|
||||
ctx->kfd_bo.tv.num_shared = 1;
|
||||
ctx->kfd_bo.user_pages = NULL;
|
||||
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
||||
|
||||
|
@ -741,7 +742,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
|
|||
|
||||
ctx->kfd_bo.priority = 0;
|
||||
ctx->kfd_bo.tv.bo = &bo->tbo;
|
||||
ctx->kfd_bo.tv.shared = true;
|
||||
ctx->kfd_bo.tv.num_shared = 1;
|
||||
ctx->kfd_bo.user_pages = NULL;
|
||||
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
|
||||
|
||||
|
@ -826,7 +827,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
|
|||
/* Add the eviction fence back */
|
||||
amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
|
||||
|
||||
sync_vm_fence(adev, sync, bo_va->last_pt_update);
|
||||
amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -851,7 +852,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
|
|||
return ret;
|
||||
}
|
||||
|
||||
return sync_vm_fence(adev, sync, bo_va->last_pt_update);
|
||||
return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
|
||||
}
|
||||
|
||||
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
|
||||
|
@ -886,6 +887,24 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
|
||||
{
|
||||
struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
|
||||
|
||||
if (!sg)
|
||||
return NULL;
|
||||
if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
|
||||
kfree(sg);
|
||||
return NULL;
|
||||
}
|
||||
sg->sgl->dma_address = addr;
|
||||
sg->sgl->length = size;
|
||||
#ifdef CONFIG_NEED_SG_DMA_LENGTH
|
||||
sg->sgl->dma_length = size;
|
||||
#endif
|
||||
return sg;
|
||||
}
|
||||
|
||||
static int process_validate_vms(struct amdkfd_process_info *process_info)
|
||||
{
|
||||
struct amdgpu_vm *peer_vm;
|
||||
|
@ -901,6 +920,26 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
|
||||
struct amdgpu_sync *sync)
|
||||
{
|
||||
struct amdgpu_vm *peer_vm;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node) {
|
||||
struct amdgpu_bo *pd = peer_vm->root.base.bo;
|
||||
|
||||
ret = amdgpu_sync_resv(NULL,
|
||||
sync, pd->tbo.resv,
|
||||
AMDGPU_FENCE_OWNER_UNDEFINED, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int process_update_pds(struct amdkfd_process_info *process_info,
|
||||
struct amdgpu_sync *sync)
|
||||
{
|
||||
|
@ -1149,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
{
|
||||
struct amdgpu_device *adev = get_amdgpu_device(kgd);
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
enum ttm_bo_type bo_type = ttm_bo_type_device;
|
||||
struct sg_table *sg = NULL;
|
||||
uint64_t user_addr = 0;
|
||||
struct amdgpu_bo *bo;
|
||||
struct amdgpu_bo_param bp;
|
||||
|
@ -1177,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
if (!offset || !*offset)
|
||||
return -EINVAL;
|
||||
user_addr = *offset;
|
||||
} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
|
||||
domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
|
||||
bo_type = ttm_bo_type_sg;
|
||||
alloc_flags = 0;
|
||||
if (size > UINT_MAX)
|
||||
return -EINVAL;
|
||||
sg = create_doorbell_sg(*offset, size);
|
||||
if (!sg)
|
||||
return -ENOMEM;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
|
||||
if (!*mem)
|
||||
return -ENOMEM;
|
||||
if (!*mem) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
INIT_LIST_HEAD(&(*mem)->bo_va_list);
|
||||
mutex_init(&(*mem)->lock);
|
||||
(*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
|
||||
|
@ -1199,7 +1252,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
byte_align = (adev->family == AMDGPU_FAMILY_VI &&
|
||||
adev->asic_type != CHIP_FIJI &&
|
||||
adev->asic_type != CHIP_POLARIS10 &&
|
||||
adev->asic_type != CHIP_POLARIS11) ?
|
||||
adev->asic_type != CHIP_POLARIS11 &&
|
||||
adev->asic_type != CHIP_POLARIS12) ?
|
||||
VI_BO_SIZE_ALIGN : 1;
|
||||
|
||||
mapping_flags = AMDGPU_VM_PAGE_READABLE;
|
||||
|
@ -1215,10 +1269,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
|
||||
amdgpu_sync_create(&(*mem)->sync);
|
||||
|
||||
ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
|
||||
ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
|
||||
if (ret) {
|
||||
pr_debug("Insufficient system memory\n");
|
||||
goto err_reserve_system_mem;
|
||||
goto err_reserve_limit;
|
||||
}
|
||||
|
||||
pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
|
||||
|
@ -1229,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
bp.byte_align = byte_align;
|
||||
bp.domain = alloc_domain;
|
||||
bp.flags = alloc_flags;
|
||||
bp.type = ttm_bo_type_device;
|
||||
bp.type = bo_type;
|
||||
bp.resv = NULL;
|
||||
ret = amdgpu_bo_create(adev, &bp, &bo);
|
||||
if (ret) {
|
||||
|
@ -1237,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
domain_string(alloc_domain), ret);
|
||||
goto err_bo_create;
|
||||
}
|
||||
if (bo_type == ttm_bo_type_sg) {
|
||||
bo->tbo.sg = sg;
|
||||
bo->tbo.ttm->sg = sg;
|
||||
}
|
||||
bo->kfd_bo = *mem;
|
||||
(*mem)->bo = bo;
|
||||
if (user_addr)
|
||||
|
@ -1266,12 +1324,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
|
|||
allocate_init_user_pages_failed:
|
||||
amdgpu_bo_unref(&bo);
|
||||
/* Don't unreserve system mem limit twice */
|
||||
goto err_reserve_system_mem;
|
||||
goto err_reserve_limit;
|
||||
err_bo_create:
|
||||
unreserve_system_mem_limit(adev, size, alloc_domain);
|
||||
err_reserve_system_mem:
|
||||
unreserve_mem_limit(adev, size, alloc_domain, !!sg);
|
||||
err_reserve_limit:
|
||||
mutex_destroy(&(*mem)->lock);
|
||||
kfree(*mem);
|
||||
err:
|
||||
if (sg) {
|
||||
sg_free_table(sg);
|
||||
kfree(sg);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1341,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
|
|||
/* Free the sync object */
|
||||
amdgpu_sync_free(&mem->sync);
|
||||
|
||||
/* If the SG is not NULL, it's one we created for a doorbell
|
||||
* BO. We need to free it.
|
||||
*/
|
||||
if (mem->bo->tbo.sg) {
|
||||
sg_free_table(mem->bo->tbo.sg);
|
||||
kfree(mem->bo->tbo.sg);
|
||||
}
|
||||
|
||||
/* Free the BO*/
|
||||
amdgpu_bo_unref(&mem->bo);
|
||||
mutex_destroy(&mem->lock);
|
||||
|
@ -1405,7 +1476,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
|
|||
* the queues are still stopped and we can leave mapping for
|
||||
* the next restore worker
|
||||
*/
|
||||
if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
|
||||
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
|
||||
bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
|
||||
is_invalid_userptr = true;
|
||||
|
||||
if (check_if_add_bo_to_vm(avm, mem)) {
|
||||
|
@ -1642,6 +1714,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
|
||||
struct dma_buf *dma_buf,
|
||||
uint64_t va, void *vm,
|
||||
struct kgd_mem **mem, uint64_t *size,
|
||||
uint64_t *mmap_offset)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
|
||||
struct drm_gem_object *obj;
|
||||
struct amdgpu_bo *bo;
|
||||
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
|
||||
|
||||
if (dma_buf->ops != &amdgpu_dmabuf_ops)
|
||||
/* Can't handle non-graphics buffers */
|
||||
return -EINVAL;
|
||||
|
||||
obj = dma_buf->priv;
|
||||
if (obj->dev->dev_private != adev)
|
||||
/* Can't handle buffers from other devices */
|
||||
return -EINVAL;
|
||||
|
||||
bo = gem_to_amdgpu_bo(obj);
|
||||
if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
|
||||
AMDGPU_GEM_DOMAIN_GTT)))
|
||||
/* Only VRAM and GTT BOs are supported */
|
||||
return -EINVAL;
|
||||
|
||||
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
|
||||
if (!*mem)
|
||||
return -ENOMEM;
|
||||
|
||||
if (size)
|
||||
*size = amdgpu_bo_size(bo);
|
||||
|
||||
if (mmap_offset)
|
||||
*mmap_offset = amdgpu_bo_mmap_offset(bo);
|
||||
|
||||
INIT_LIST_HEAD(&(*mem)->bo_va_list);
|
||||
mutex_init(&(*mem)->lock);
|
||||
(*mem)->mapping_flags =
|
||||
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
|
||||
AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
|
||||
|
||||
(*mem)->bo = amdgpu_bo_ref(bo);
|
||||
(*mem)->va = va;
|
||||
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
|
||||
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
|
||||
(*mem)->mapped_to_gpu_memory = 0;
|
||||
(*mem)->process_info = avm->process_info;
|
||||
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
|
||||
amdgpu_sync_create(&(*mem)->sync);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Evict a userptr BO by stopping the queues if necessary
|
||||
*
|
||||
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
|
||||
|
@ -1808,7 +1934,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
|
|||
validate_list.head) {
|
||||
list_add_tail(&mem->resv_list.head, &resv_list);
|
||||
mem->resv_list.bo = mem->validate_list.bo;
|
||||
mem->resv_list.shared = mem->validate_list.shared;
|
||||
mem->resv_list.num_shared = mem->validate_list.num_shared;
|
||||
}
|
||||
|
||||
/* Reserve all BOs and page tables for validation */
|
||||
|
@ -2027,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
|||
|
||||
list_add_tail(&mem->resv_list.head, &ctx.list);
|
||||
mem->resv_list.bo = mem->validate_list.bo;
|
||||
mem->resv_list.shared = mem->validate_list.shared;
|
||||
mem->resv_list.num_shared = mem->validate_list.num_shared;
|
||||
}
|
||||
|
||||
ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
|
||||
|
@ -2044,13 +2170,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
|||
if (ret)
|
||||
goto validate_map_fail;
|
||||
|
||||
/* Wait for PD/PTs validate to finish */
|
||||
/* FIXME: I think this isn't needed */
|
||||
list_for_each_entry(peer_vm, &process_info->vm_list_head,
|
||||
vm_list_node) {
|
||||
struct amdgpu_bo *bo = peer_vm->root.base.bo;
|
||||
|
||||
ttm_bo_wait(&bo->tbo, false, false);
|
||||
ret = process_sync_pds_resv(process_info, &sync_obj);
|
||||
if (ret) {
|
||||
pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
|
||||
goto validate_map_fail;
|
||||
}
|
||||
|
||||
/* Validate BOs and map them to GPUVM (update VM page tables). */
|
||||
|
@ -2066,7 +2189,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
|||
pr_debug("Memory eviction: Validate BOs failed. Try again\n");
|
||||
goto validate_map_fail;
|
||||
}
|
||||
|
||||
ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
|
||||
if (ret) {
|
||||
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
|
||||
goto validate_map_fail;
|
||||
}
|
||||
list_for_each_entry(bo_va_entry, &mem->bo_va_list,
|
||||
bo_list) {
|
||||
ret = update_gpuvm_pte((struct amdgpu_device *)
|
||||
|
@ -2087,6 +2214,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
|||
goto validate_map_fail;
|
||||
}
|
||||
|
||||
/* Wait for validate and PT updates to finish */
|
||||
amdgpu_sync_wait(&sync_obj, false);
|
||||
|
||||
/* Release old eviction fence and create new one, because fence only
|
||||
|
@ -2105,10 +2233,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
|
|||
process_info->eviction_fence = new_fence;
|
||||
*ef = dma_fence_get(&new_fence->base);
|
||||
|
||||
/* Wait for validate to finish and attach new eviction fence */
|
||||
list_for_each_entry(mem, &process_info->kfd_bo_list,
|
||||
validate_list.head)
|
||||
ttm_bo_wait(&mem->bo->tbo, false, false);
|
||||
/* Attach new eviction fence to all BOs */
|
||||
list_for_each_entry(mem, &process_info->kfd_bo_list,
|
||||
validate_list.head)
|
||||
amdgpu_bo_fence(mem->bo,
|
||||
|
|
|
@ -118,7 +118,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
|
|||
entry->priority = min(info[i].bo_priority,
|
||||
AMDGPU_BO_LIST_MAX_PRIORITY);
|
||||
entry->tv.bo = &bo->tbo;
|
||||
entry->tv.shared = !bo->prime_shared_count;
|
||||
|
||||
if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
|
||||
list->gds_obj = bo;
|
||||
|
|
|
@ -50,7 +50,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
|
|||
bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
|
||||
p->uf_entry.priority = 0;
|
||||
p->uf_entry.tv.bo = &bo->tbo;
|
||||
p->uf_entry.tv.shared = true;
|
||||
/* One for TTM and one for the CS job */
|
||||
p->uf_entry.tv.num_shared = 2;
|
||||
p->uf_entry.user_pages = NULL;
|
||||
|
||||
drm_gem_object_put_unlocked(gobj);
|
||||
|
@ -598,6 +599,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||
return r;
|
||||
}
|
||||
|
||||
/* One for TTM and one for the CS job */
|
||||
amdgpu_bo_list_for_each_entry(e, p->bo_list)
|
||||
e->tv.num_shared = 2;
|
||||
|
||||
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
|
||||
if (p->bo_list->first_userptr != p->bo_list->num_entries)
|
||||
p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
|
||||
|
@ -717,8 +722,14 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||
gws = p->bo_list->gws_obj;
|
||||
oa = p->bo_list->oa_obj;
|
||||
|
||||
amdgpu_bo_list_for_each_entry(e, p->bo_list)
|
||||
e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
|
||||
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
|
||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
||||
|
||||
/* Make sure we use the exclusive slot for shared BOs */
|
||||
if (bo->prime_shared_count)
|
||||
e->tv.num_shared = 0;
|
||||
e->bo_va = amdgpu_vm_bo_find(vm, bo);
|
||||
}
|
||||
|
||||
if (gds) {
|
||||
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
|
||||
|
@ -955,10 +966,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
|
||||
|
||||
if (amdgpu_vm_debug) {
|
||||
|
@ -1104,7 +1111,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
|
|||
{
|
||||
int r;
|
||||
struct dma_fence *fence;
|
||||
r = drm_syncobj_find_fence(p->filp, handle, 0, &fence);
|
||||
r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -1193,7 +1200,7 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < p->num_post_dep_syncobjs; ++i)
|
||||
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence);
|
||||
drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
|
||||
}
|
||||
|
||||
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
||||
|
@ -1260,8 +1267,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
|||
return 0;
|
||||
|
||||
error_abort:
|
||||
dma_fence_put(&job->base.s_fence->finished);
|
||||
job->base.s_fence = NULL;
|
||||
drm_sched_job_cleanup(&job->base);
|
||||
amdgpu_mn_unlock(p->mn);
|
||||
|
||||
error_unlock:
|
||||
|
@ -1285,7 +1291,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|||
|
||||
r = amdgpu_cs_parser_init(&parser, data);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to initialize parser !\n");
|
||||
DRM_ERROR("Failed to initialize parser %d!\n", r);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright 2016 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
* * Author: Monk.liu@amd.com
|
||||
*/
|
||||
|
||||
#include "amdgpu.h"
|
||||
|
||||
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
|
||||
|
||||
addr -= AMDGPU_VA_RESERVED_SIZE;
|
||||
addr = amdgpu_gmc_sign_extend(addr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
|
||||
u32 domain, uint32_t size)
|
||||
{
|
||||
int r;
|
||||
void *ptr;
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
|
||||
domain, bo,
|
||||
NULL, &ptr);
|
||||
if (!*bo)
|
||||
return -ENOMEM;
|
||||
|
||||
memset(ptr, 0, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_free_static_csa(struct amdgpu_bo **bo)
|
||||
{
|
||||
amdgpu_bo_free_kernel(bo, NULL, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* amdgpu_map_static_csa should be called during amdgpu_vm_init
|
||||
* it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
|
||||
* submission of GFX should use this virtual address within META_DATA init
|
||||
* package to support SRIOV gfx preemption.
|
||||
*/
|
||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
|
||||
uint64_t csa_addr, uint32_t size)
|
||||
{
|
||||
struct ww_acquire_ctx ticket;
|
||||
struct list_head list;
|
||||
struct amdgpu_bo_list_entry pd;
|
||||
struct ttm_validate_buffer csa_tv;
|
||||
int r;
|
||||
|
||||
INIT_LIST_HEAD(&list);
|
||||
INIT_LIST_HEAD(&csa_tv.head);
|
||||
csa_tv.bo = &bo->tbo;
|
||||
csa_tv.num_shared = 1;
|
||||
|
||||
list_add(&csa_tv.head, &list);
|
||||
amdgpu_vm_get_pd_bo(vm, &list, &pd);
|
||||
|
||||
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
*bo_va = amdgpu_vm_bo_add(adev, vm, bo);
|
||||
if (!*bo_va) {
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
DRM_ERROR("failed to create bo_va for static CSA\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
|
||||
size);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
|
||||
amdgpu_vm_bo_rmv(adev, *bo_va);
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
|
||||
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
|
||||
AMDGPU_PTE_EXECUTABLE);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
|
||||
amdgpu_vm_bo_rmv(adev, *bo_va);
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return r;
|
||||
}
|
||||
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright 2016 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Author: Monk.liu@amd.com
|
||||
*/
|
||||
|
||||
#ifndef AMDGPU_CSA_MANAGER_H
|
||||
#define AMDGPU_CSA_MANAGER_H
|
||||
|
||||
#define AMDGPU_CSA_SIZE (128 * 1024)
|
||||
|
||||
uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev);
|
||||
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
|
||||
int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,
|
||||
u32 domain, uint32_t size);
|
||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
|
||||
uint64_t csa_addr, uint32_t size);
|
||||
void amdgpu_free_static_csa(struct amdgpu_bo **bo);
|
||||
|
||||
#endif
|
|
@ -248,7 +248,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
|
|||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&mgr->lock);
|
||||
r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
|
||||
r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
|
||||
if (r < 0) {
|
||||
mutex_unlock(&mgr->lock);
|
||||
kfree(ctx);
|
||||
|
|
|
@ -59,6 +59,8 @@
|
|||
#include "amdgpu_amdkfd.h"
|
||||
#include "amdgpu_pm.h"
|
||||
|
||||
#include "amdgpu_xgmi.h"
|
||||
|
||||
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
|
||||
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
|
||||
|
@ -513,6 +515,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
|
|||
*/
|
||||
static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
|
||||
{
|
||||
|
||||
/* No doorbell on SI hardware generation */
|
||||
if (adev->asic_type < CHIP_BONAIRE) {
|
||||
adev->doorbell.base = 0;
|
||||
|
@ -525,15 +528,26 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
|
|||
if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
|
||||
return -EINVAL;
|
||||
|
||||
amdgpu_asic_init_doorbell_index(adev);
|
||||
|
||||
/* doorbell bar mapping */
|
||||
adev->doorbell.base = pci_resource_start(adev->pdev, 2);
|
||||
adev->doorbell.size = pci_resource_len(adev->pdev, 2);
|
||||
|
||||
adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32),
|
||||
AMDGPU_DOORBELL_MAX_ASSIGNMENT+1);
|
||||
adev->doorbell_index.max_assignment+1);
|
||||
if (adev->doorbell.num_doorbells == 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* For Vega, reserve and map two pages on doorbell BAR since SDMA
|
||||
* paging queue doorbell use the second page. The
|
||||
* AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
|
||||
* doorbells are in the first page. So with paging queue enabled,
|
||||
* the max num_doorbells should + 1 page (0x400 in dword)
|
||||
*/
|
||||
if (adev->asic_type >= CHIP_VEGA10)
|
||||
adev->doorbell.num_doorbells += 0x400;
|
||||
|
||||
adev->doorbell.ptr = ioremap(adev->doorbell.base,
|
||||
adev->doorbell.num_doorbells *
|
||||
sizeof(u32));
|
||||
|
@ -1656,7 +1670,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
|||
|
||||
/* right after GMC hw init, we create CSA */
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
r = amdgpu_allocate_static_csa(adev);
|
||||
r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
AMDGPU_CSA_SIZE);
|
||||
if (r) {
|
||||
DRM_ERROR("allocate CSA failed %d\n", r);
|
||||
return r;
|
||||
|
@ -1681,7 +1697,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
amdgpu_xgmi_add_device(adev);
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
amdgpu_xgmi_add_device(adev);
|
||||
amdgpu_amdkfd_device_init(adev);
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
|
@ -1848,6 +1865,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
|
|||
{
|
||||
int i, r;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
amdgpu_xgmi_remove_device(adev);
|
||||
|
||||
amdgpu_amdkfd_device_fini(adev);
|
||||
|
||||
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
|
||||
|
@ -1890,7 +1910,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
|
|||
|
||||
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
|
||||
amdgpu_ucode_free_bo(adev);
|
||||
amdgpu_free_static_csa(adev);
|
||||
amdgpu_free_static_csa(&adev->virt.csa_obj);
|
||||
amdgpu_device_wb_fini(adev);
|
||||
amdgpu_device_vram_scratch_fini(adev);
|
||||
}
|
||||
|
@ -2337,6 +2357,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
|
|||
return amdgpu_device_asic_has_dc_support(adev->asic_type);
|
||||
}
|
||||
|
||||
|
||||
static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
|
||||
{
|
||||
struct amdgpu_device *adev =
|
||||
container_of(__work, struct amdgpu_device, xgmi_reset_work);
|
||||
|
||||
adev->asic_reset_res = amdgpu_asic_reset(adev);
|
||||
if (adev->asic_reset_res)
|
||||
DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
|
||||
adev->asic_reset_res, adev->ddev->unique);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* amdgpu_device_init - initialize the driver
|
||||
*
|
||||
|
@ -2435,6 +2468,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
|
||||
amdgpu_device_delay_enable_gfx_off);
|
||||
|
||||
INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
|
||||
|
||||
adev->gfx.gfx_off_req_count = 1;
|
||||
adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
|
||||
|
||||
|
@ -2455,9 +2490,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
|
||||
DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
|
||||
|
||||
/* doorbell bar mapping */
|
||||
amdgpu_device_doorbell_init(adev);
|
||||
|
||||
/* io port mapping */
|
||||
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
|
||||
if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) {
|
||||
|
@ -2476,6 +2508,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
|
|||
if (r)
|
||||
return r;
|
||||
|
||||
/* doorbell bar mapping and doorbell index init*/
|
||||
amdgpu_device_doorbell_init(adev);
|
||||
|
||||
/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
|
||||
/* this will fail for cards that aren't VGA class devices, just
|
||||
* ignore it */
|
||||
|
@ -3148,86 +3183,6 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
*
|
||||
* attempt to do soft-reset or full-reset and reinitialize Asic
|
||||
* return 0 means succeeded otherwise failed
|
||||
*/
|
||||
static int amdgpu_device_reset(struct amdgpu_device *adev)
|
||||
{
|
||||
bool need_full_reset, vram_lost = 0;
|
||||
int r;
|
||||
|
||||
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
|
||||
|
||||
if (!need_full_reset) {
|
||||
amdgpu_device_ip_pre_soft_reset(adev);
|
||||
r = amdgpu_device_ip_soft_reset(adev);
|
||||
amdgpu_device_ip_post_soft_reset(adev);
|
||||
if (r || amdgpu_device_ip_check_soft_reset(adev)) {
|
||||
DRM_INFO("soft reset failed, will fallback to full reset!\n");
|
||||
need_full_reset = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_full_reset) {
|
||||
r = amdgpu_device_ip_suspend(adev);
|
||||
|
||||
retry:
|
||||
r = amdgpu_asic_reset(adev);
|
||||
/* post card */
|
||||
amdgpu_atom_asic_init(adev->mode_info.atom_context);
|
||||
|
||||
if (!r) {
|
||||
dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
|
||||
r = amdgpu_device_ip_resume_phase1(adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
vram_lost = amdgpu_device_check_vram_lost(adev);
|
||||
if (vram_lost) {
|
||||
DRM_ERROR("VRAM is lost!\n");
|
||||
atomic_inc(&adev->vram_lost_counter);
|
||||
}
|
||||
|
||||
r = amdgpu_gtt_mgr_recover(
|
||||
&adev->mman.bdev.man[TTM_PL_TT]);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = amdgpu_device_fw_loading(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_device_ip_resume_phase2(adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
if (vram_lost)
|
||||
amdgpu_device_fill_reset_magic(adev);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (!r) {
|
||||
amdgpu_irq_gpu_reset_resume_helper(adev);
|
||||
r = amdgpu_ib_ring_tests(adev);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "ib ring test failed (%d).\n", r);
|
||||
r = amdgpu_device_ip_suspend(adev);
|
||||
need_full_reset = true;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
if (!r)
|
||||
r = amdgpu_device_recover_vram(adev);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
|
||||
|
@ -3295,40 +3250,46 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 &&
|
||||
!amdgpu_sriov_vf(adev))) {
|
||||
DRM_INFO("GPU recovery disabled.\n");
|
||||
return false;
|
||||
if (amdgpu_gpu_recovery == 0)
|
||||
goto disabled;
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
return true;
|
||||
|
||||
if (amdgpu_gpu_recovery == -1) {
|
||||
switch (adev->asic_type) {
|
||||
case CHIP_BONAIRE:
|
||||
case CHIP_HAWAII:
|
||||
case CHIP_TOPAZ:
|
||||
case CHIP_TONGA:
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
case CHIP_VEGA20:
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_VEGA12:
|
||||
break;
|
||||
default:
|
||||
goto disabled;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
disabled:
|
||||
DRM_INFO("GPU recovery disabled.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @job: which job trigger hang
|
||||
*
|
||||
* Attempt to reset the GPU if it has hung (all asics).
|
||||
* Returns 0 for success or an error on failure.
|
||||
*/
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
|
||||
static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job,
|
||||
bool *need_full_reset_arg)
|
||||
{
|
||||
int i, r, resched;
|
||||
|
||||
dev_info(adev->dev, "GPU reset begin!\n");
|
||||
|
||||
mutex_lock(&adev->lock_reset);
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
adev->in_gpu_reset = 1;
|
||||
|
||||
/* Block kfd */
|
||||
amdgpu_amdkfd_pre_reset(adev);
|
||||
|
||||
/* block TTM */
|
||||
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
|
||||
int i, r = 0;
|
||||
bool need_full_reset = *need_full_reset_arg;
|
||||
|
||||
/* block all schedulers and reset given job's ring */
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
|
@ -3348,10 +3309,144 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
|||
amdgpu_fence_driver_force_completion(ring);
|
||||
}
|
||||
|
||||
if (amdgpu_sriov_vf(adev))
|
||||
r = amdgpu_device_reset_sriov(adev, job ? false : true);
|
||||
else
|
||||
r = amdgpu_device_reset(adev);
|
||||
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
|
||||
if (!need_full_reset)
|
||||
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
|
||||
|
||||
if (!need_full_reset) {
|
||||
amdgpu_device_ip_pre_soft_reset(adev);
|
||||
r = amdgpu_device_ip_soft_reset(adev);
|
||||
amdgpu_device_ip_post_soft_reset(adev);
|
||||
if (r || amdgpu_device_ip_check_soft_reset(adev)) {
|
||||
DRM_INFO("soft reset failed, will fallback to full reset!\n");
|
||||
need_full_reset = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_full_reset)
|
||||
r = amdgpu_device_ip_suspend(adev);
|
||||
|
||||
*need_full_reset_arg = need_full_reset;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
|
||||
struct list_head *device_list_handle,
|
||||
bool *need_full_reset_arg)
|
||||
{
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
bool need_full_reset = *need_full_reset_arg, vram_lost = false;
|
||||
int r = 0;
|
||||
|
||||
/*
|
||||
* ASIC reset has to be done on all HGMI hive nodes ASAP
|
||||
* to allow proper links negotiation in FW (within 1 sec)
|
||||
*/
|
||||
if (need_full_reset) {
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
/* For XGMI run all resets in parallel to speed up the process */
|
||||
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
|
||||
r = -EALREADY;
|
||||
} else
|
||||
r = amdgpu_asic_reset(tmp_adev);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s",
|
||||
r, tmp_adev->ddev->unique);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* For XGMI wait for all PSP resets to complete before proceed */
|
||||
if (!r) {
|
||||
list_for_each_entry(tmp_adev, device_list_handle,
|
||||
gmc.xgmi.head) {
|
||||
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
flush_work(&tmp_adev->xgmi_reset_work);
|
||||
r = tmp_adev->asic_reset_res;
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
if (need_full_reset) {
|
||||
/* post card */
|
||||
if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
|
||||
DRM_WARN("asic atom init failed!");
|
||||
|
||||
if (!r) {
|
||||
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
|
||||
r = amdgpu_device_ip_resume_phase1(tmp_adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
|
||||
if (vram_lost) {
|
||||
DRM_ERROR("VRAM is lost!\n");
|
||||
atomic_inc(&tmp_adev->vram_lost_counter);
|
||||
}
|
||||
|
||||
r = amdgpu_gtt_mgr_recover(
|
||||
&tmp_adev->mman.bdev.man[TTM_PL_TT]);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
r = amdgpu_device_fw_loading(tmp_adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = amdgpu_device_ip_resume_phase2(tmp_adev);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
if (vram_lost)
|
||||
amdgpu_device_fill_reset_magic(tmp_adev);
|
||||
|
||||
/* Update PSP FW topology after reset */
|
||||
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out:
|
||||
if (!r) {
|
||||
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
|
||||
r = amdgpu_ib_ring_tests(tmp_adev);
|
||||
if (r) {
|
||||
dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
|
||||
r = amdgpu_device_ip_suspend(tmp_adev);
|
||||
need_full_reset = true;
|
||||
r = -EAGAIN;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
|
||||
if (!r)
|
||||
r = amdgpu_device_recover_vram(tmp_adev);
|
||||
else
|
||||
tmp_adev->asic_reset_res = r;
|
||||
}
|
||||
|
||||
end:
|
||||
*need_full_reset_arg = need_full_reset;
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
@ -3363,7 +3458,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
|||
* or all rings (in the case @job is NULL)
|
||||
* after above amdgpu_reset accomplished
|
||||
*/
|
||||
if ((!job || job->base.sched == &ring->sched) && !r)
|
||||
if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
|
||||
drm_sched_job_recovery(&ring->sched);
|
||||
|
||||
kthread_unpark(ring->sched.thread);
|
||||
|
@ -3373,21 +3468,142 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
|||
drm_helper_resume_force_mode(adev->ddev);
|
||||
}
|
||||
|
||||
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
|
||||
adev->asic_reset_res = 0;
|
||||
}
|
||||
|
||||
if (r) {
|
||||
/* bad news, how to tell it to userspace ? */
|
||||
dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
|
||||
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
|
||||
} else {
|
||||
dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
|
||||
}
|
||||
static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
|
||||
{
|
||||
mutex_lock(&adev->lock_reset);
|
||||
atomic_inc(&adev->gpu_reset_counter);
|
||||
adev->in_gpu_reset = 1;
|
||||
/* Block kfd */
|
||||
amdgpu_amdkfd_pre_reset(adev);
|
||||
}
|
||||
|
||||
static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
|
||||
{
|
||||
/*unlock kfd */
|
||||
amdgpu_amdkfd_post_reset(adev);
|
||||
amdgpu_vf_error_trans_all(adev);
|
||||
adev->in_gpu_reset = 0;
|
||||
mutex_unlock(&adev->lock_reset);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @job: which job trigger hang
|
||||
*
|
||||
* Attempt to reset the GPU if it has hung (all asics).
|
||||
* Attempt to do soft-reset or full-reset and reinitialize Asic
|
||||
* Returns 0 for success or an error on failure.
|
||||
*/
|
||||
|
||||
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
struct amdgpu_job *job)
|
||||
{
|
||||
int r;
|
||||
struct amdgpu_hive_info *hive = NULL;
|
||||
bool need_full_reset = false;
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
struct list_head device_list, *device_list_handle = NULL;
|
||||
|
||||
INIT_LIST_HEAD(&device_list);
|
||||
|
||||
dev_info(adev->dev, "GPU reset begin!\n");
|
||||
|
||||
/*
|
||||
* In case of XGMI hive disallow concurrent resets to be triggered
|
||||
* by different nodes. No point also since the one node already executing
|
||||
* reset will also reset all the other nodes in the hive.
|
||||
*/
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
!mutex_trylock(&hive->hive_lock))
|
||||
return 0;
|
||||
|
||||
/* Start with adev pre asic reset first for soft reset check.*/
|
||||
amdgpu_device_lock_adev(adev);
|
||||
r = amdgpu_device_pre_asic_reset(adev,
|
||||
job,
|
||||
&need_full_reset);
|
||||
if (r) {
|
||||
/*TODO Should we stop ?*/
|
||||
DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
|
||||
r, adev->ddev->unique);
|
||||
adev->asic_reset_res = r;
|
||||
}
|
||||
|
||||
/* Build list of devices to reset */
|
||||
if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
if (!hive) {
|
||||
amdgpu_device_unlock_adev(adev);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* In case we are in XGMI hive mode device reset is done for all the
|
||||
* nodes in the hive to retrain all XGMI links and hence the reset
|
||||
* sequence is executed in loop on all nodes.
|
||||
*/
|
||||
device_list_handle = &hive->device_list;
|
||||
} else {
|
||||
list_add_tail(&adev->gmc.xgmi.head, &device_list);
|
||||
device_list_handle = &device_list;
|
||||
}
|
||||
|
||||
retry: /* Rest of adevs pre asic reset from XGMI hive. */
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
|
||||
if (tmp_adev == adev)
|
||||
continue;
|
||||
|
||||
amdgpu_device_lock_adev(tmp_adev);
|
||||
r = amdgpu_device_pre_asic_reset(tmp_adev,
|
||||
NULL,
|
||||
&need_full_reset);
|
||||
/*TODO Should we stop ?*/
|
||||
if (r) {
|
||||
DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
|
||||
r, tmp_adev->ddev->unique);
|
||||
tmp_adev->asic_reset_res = r;
|
||||
}
|
||||
}
|
||||
|
||||
/* Actual ASIC resets if needed.*/
|
||||
/* TODO Implement XGMI hive reset logic for SRIOV */
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
r = amdgpu_device_reset_sriov(adev, job ? false : true);
|
||||
if (r)
|
||||
adev->asic_reset_res = r;
|
||||
} else {
|
||||
r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
|
||||
if (r && r == -EAGAIN)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* Post ASIC reset for all devs .*/
|
||||
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
|
||||
amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
|
||||
|
||||
if (r) {
|
||||
/* bad news, how to tell it to userspace ? */
|
||||
dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
|
||||
amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
|
||||
} else {
|
||||
dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
|
||||
}
|
||||
|
||||
amdgpu_device_unlock_adev(tmp_adev);
|
||||
}
|
||||
|
||||
if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
|
||||
mutex_unlock(&hive->hive_lock);
|
||||
|
||||
if (r)
|
||||
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
|
|
@ -631,6 +631,11 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
|
|||
drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
|
||||
if (!adev->mode_info.max_bpc_property)
|
||||
return -ENOMEM;
|
||||
adev->mode_info.abm_level_property =
|
||||
drm_property_create_range(adev->ddev, 0,
|
||||
"abm level", 0, 4);
|
||||
if (!adev->mode_info.abm_level_property)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -857,7 +862,12 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
|
|||
/* Inside "upper part" of vblank area? Apply corrective offset if so: */
|
||||
if (in_vbl && (*vpos >= vbl_start)) {
|
||||
vtotal = mode->crtc_vtotal;
|
||||
*vpos = *vpos - vtotal;
|
||||
|
||||
/* With variable refresh rate displays the vpos can exceed
|
||||
* the vtotal value. Clamp to 0 to return -vbl_end instead
|
||||
* of guessing the remaining number of lines until scanout.
|
||||
*/
|
||||
*vpos = (*vpos < vtotal) ? (*vpos - vtotal) : 0;
|
||||
}
|
||||
|
||||
/* Correct for shifted end of vbl at vbl_end. */
|
||||
|
|
|
@ -0,0 +1,243 @@
|
|||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* GPU doorbell structures, functions & helpers
|
||||
*/
|
||||
struct amdgpu_doorbell {
|
||||
/* doorbell mmio */
|
||||
resource_size_t base;
|
||||
resource_size_t size;
|
||||
u32 __iomem *ptr;
|
||||
u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
|
||||
};
|
||||
|
||||
/* Reserved doorbells for amdgpu (including multimedia).
|
||||
* KFD can use all the rest in the 2M doorbell bar.
|
||||
* For asic before vega10, doorbell is 32-bit, so the
|
||||
* index/offset is in dword. For vega10 and after, doorbell
|
||||
* can be 64-bit, so the index defined is in qword.
|
||||
*/
|
||||
struct amdgpu_doorbell_index {
|
||||
uint32_t kiq;
|
||||
uint32_t mec_ring0;
|
||||
uint32_t mec_ring1;
|
||||
uint32_t mec_ring2;
|
||||
uint32_t mec_ring3;
|
||||
uint32_t mec_ring4;
|
||||
uint32_t mec_ring5;
|
||||
uint32_t mec_ring6;
|
||||
uint32_t mec_ring7;
|
||||
uint32_t userqueue_start;
|
||||
uint32_t userqueue_end;
|
||||
uint32_t gfx_ring0;
|
||||
uint32_t sdma_engine0;
|
||||
uint32_t sdma_engine1;
|
||||
uint32_t sdma_engine2;
|
||||
uint32_t sdma_engine3;
|
||||
uint32_t sdma_engine4;
|
||||
uint32_t sdma_engine5;
|
||||
uint32_t sdma_engine6;
|
||||
uint32_t sdma_engine7;
|
||||
uint32_t ih;
|
||||
union {
|
||||
struct {
|
||||
uint32_t vcn_ring0_1;
|
||||
uint32_t vcn_ring2_3;
|
||||
uint32_t vcn_ring4_5;
|
||||
uint32_t vcn_ring6_7;
|
||||
} vcn;
|
||||
struct {
|
||||
uint32_t uvd_ring0_1;
|
||||
uint32_t uvd_ring2_3;
|
||||
uint32_t uvd_ring4_5;
|
||||
uint32_t uvd_ring6_7;
|
||||
uint32_t vce_ring0_1;
|
||||
uint32_t vce_ring2_3;
|
||||
uint32_t vce_ring4_5;
|
||||
uint32_t vce_ring6_7;
|
||||
} uvd_vce;
|
||||
};
|
||||
uint32_t max_assignment;
|
||||
};
|
||||
|
||||
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
|
||||
{
|
||||
AMDGPU_DOORBELL_KIQ = 0x000,
|
||||
AMDGPU_DOORBELL_HIQ = 0x001,
|
||||
AMDGPU_DOORBELL_DIQ = 0x002,
|
||||
AMDGPU_DOORBELL_MEC_RING0 = 0x010,
|
||||
AMDGPU_DOORBELL_MEC_RING1 = 0x011,
|
||||
AMDGPU_DOORBELL_MEC_RING2 = 0x012,
|
||||
AMDGPU_DOORBELL_MEC_RING3 = 0x013,
|
||||
AMDGPU_DOORBELL_MEC_RING4 = 0x014,
|
||||
AMDGPU_DOORBELL_MEC_RING5 = 0x015,
|
||||
AMDGPU_DOORBELL_MEC_RING6 = 0x016,
|
||||
AMDGPU_DOORBELL_MEC_RING7 = 0x017,
|
||||
AMDGPU_DOORBELL_GFX_RING0 = 0x020,
|
||||
AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0,
|
||||
AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1,
|
||||
AMDGPU_DOORBELL_IH = 0x1E8,
|
||||
AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
|
||||
AMDGPU_DOORBELL_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL_ASSIGNMENT;
|
||||
|
||||
typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
|
||||
{
|
||||
/* Compute + GFX: 0~255 */
|
||||
AMDGPU_VEGA20_DOORBELL_KIQ = 0x000,
|
||||
AMDGPU_VEGA20_DOORBELL_HIQ = 0x001,
|
||||
AMDGPU_VEGA20_DOORBELL_DIQ = 0x002,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING0 = 0x003,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING1 = 0x004,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING2 = 0x005,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING3 = 0x006,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING4 = 0x007,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009,
|
||||
AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A,
|
||||
AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B,
|
||||
AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A,
|
||||
AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B,
|
||||
/* SDMA:256~335*/
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0 = 0x100,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1 = 0x10A,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2 = 0x114,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3 = 0x11E,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4 = 0x128,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5 = 0x132,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6 = 0x13C,
|
||||
AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7 = 0x146,
|
||||
/* IH: 376~391 */
|
||||
AMDGPU_VEGA20_DOORBELL_IH = 0x178,
|
||||
/* MMSCH: 392~407
|
||||
* overlap the doorbell assignment with VCN as they are mutually exclusive
|
||||
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
|
||||
*/
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B,
|
||||
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188,
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189,
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A,
|
||||
AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7 = 0x18B,
|
||||
|
||||
AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1 = 0x18C,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E,
|
||||
AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F,
|
||||
AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F,
|
||||
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
|
||||
} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
|
||||
|
||||
/*
|
||||
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
|
||||
*/
|
||||
typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
|
||||
{
|
||||
/*
|
||||
* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
|
||||
* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
|
||||
* Compute related doorbells are allocated from 0x00 to 0x8a
|
||||
*/
|
||||
|
||||
|
||||
/* kernel scheduling */
|
||||
AMDGPU_DOORBELL64_KIQ = 0x00,
|
||||
|
||||
/* HSA interface queue and debug queue */
|
||||
AMDGPU_DOORBELL64_HIQ = 0x01,
|
||||
AMDGPU_DOORBELL64_DIQ = 0x02,
|
||||
|
||||
/* Compute engines */
|
||||
AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
|
||||
AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
|
||||
AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
|
||||
AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
|
||||
AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
|
||||
AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
|
||||
AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
|
||||
AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,
|
||||
|
||||
/* User queue doorbell range (128 doorbells) */
|
||||
AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
|
||||
AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,
|
||||
|
||||
/* Graphics engine */
|
||||
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
|
||||
|
||||
/*
|
||||
* Other graphics doorbells can be allocated here: from 0x8c to 0xdf
|
||||
* Graphics voltage island aperture 1
|
||||
* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
|
||||
*/
|
||||
|
||||
/* For vega10 sriov, the sdma doorbell must be fixed as follow
|
||||
* to keep the same setting with host driver, or it will
|
||||
* happen conflicts
|
||||
*/
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0,
|
||||
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
|
||||
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2,
|
||||
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
|
||||
|
||||
/* Interrupt handler */
|
||||
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
|
||||
AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
|
||||
AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */
|
||||
|
||||
/* VCN engine use 32 bits doorbell */
|
||||
AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
|
||||
AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
|
||||
AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
|
||||
AMDGPU_DOORBELL64_VCN6_7 = 0xFB,
|
||||
|
||||
/* overlap the doorbell assignment with VCN as they are mutually exclusive
|
||||
* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
|
||||
*/
|
||||
AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8,
|
||||
AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9,
|
||||
AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA,
|
||||
AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB,
|
||||
|
||||
AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC,
|
||||
AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD,
|
||||
AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE,
|
||||
AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF,
|
||||
|
||||
AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
|
||||
AMDGPU_DOORBELL64_INVALID = 0xFFFF
|
||||
} AMDGPU_DOORBELL64_ASSIGNMENT;
|
||||
|
||||
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
|
||||
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
|
||||
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
|
||||
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
|
||||
|
||||
#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
|
||||
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
|
||||
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
|
||||
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
|
||||
|
|
@ -454,9 +454,10 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
|
|||
|
||||
/**
|
||||
* DOC: param_buf_per_se (int)
|
||||
* Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx).
|
||||
* Override the size of Off-Chip Parameter Cache per Shader Engine in Byte.
|
||||
* The default is 0 (depending on gfx).
|
||||
*/
|
||||
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
|
||||
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)");
|
||||
module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
|
||||
|
||||
/**
|
||||
|
@ -1227,9 +1228,6 @@ static struct drm_driver kms_driver = {
|
|||
.patchlevel = KMS_DRIVER_PATCHLEVEL,
|
||||
};
|
||||
|
||||
static struct drm_driver *driver;
|
||||
static struct pci_driver *pdriver;
|
||||
|
||||
static struct pci_driver amdgpu_kms_pci_driver = {
|
||||
.name = DRIVER_NAME,
|
||||
.id_table = pciidlist,
|
||||
|
@ -1259,16 +1257,14 @@ static int __init amdgpu_init(void)
|
|||
goto error_fence;
|
||||
|
||||
DRM_INFO("amdgpu kernel modesetting enabled.\n");
|
||||
driver = &kms_driver;
|
||||
pdriver = &amdgpu_kms_pci_driver;
|
||||
driver->num_ioctls = amdgpu_max_kms_ioctl;
|
||||
kms_driver.num_ioctls = amdgpu_max_kms_ioctl;
|
||||
amdgpu_register_atpx_handler();
|
||||
|
||||
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
|
||||
amdgpu_amdkfd_init();
|
||||
|
||||
/* let modprobe override vga console setting */
|
||||
return pci_register_driver(pdriver);
|
||||
return pci_register_driver(&amdgpu_kms_pci_driver);
|
||||
|
||||
error_fence:
|
||||
amdgpu_sync_fini();
|
||||
|
@ -1280,7 +1276,7 @@ static int __init amdgpu_init(void)
|
|||
static void __exit amdgpu_exit(void)
|
||||
{
|
||||
amdgpu_amdkfd_fini();
|
||||
pci_unregister_driver(pdriver);
|
||||
pci_unregister_driver(&amdgpu_kms_pci_driver);
|
||||
amdgpu_unregister_atpx_handler();
|
||||
amdgpu_sync_fini();
|
||||
amdgpu_fence_slab_fini();
|
||||
|
|
|
@ -398,9 +398,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
|
|||
ring->fence_drv.irq_type = irq_type;
|
||||
ring->fence_drv.initialized = true;
|
||||
|
||||
dev_dbg(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, "
|
||||
"cpu addr 0x%p\n", ring->idx,
|
||||
ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
|
||||
DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr "
|
||||
"0x%016llx, cpu addr 0x%p\n", ring->name,
|
||||
ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
|
|||
}
|
||||
mb();
|
||||
amdgpu_asic_flush_hdp(adev, NULL);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -259,6 +259,8 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
|
|||
* @offset: offset into the GPU's gart aperture
|
||||
* @pages: number of pages to bind
|
||||
* @dma_addr: DMA addresses of pages
|
||||
* @flags: page table entry flags
|
||||
* @dst: CPU address of the gart table
|
||||
*
|
||||
* Map the dma_addresses into GART entries (all asics).
|
||||
* Returns 0 for success, -EINVAL for failure.
|
||||
|
@ -331,7 +333,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
|
|||
|
||||
mb();
|
||||
amdgpu_asic_flush_hdp(adev, NULL);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0);
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ struct amdgpu_bo;
|
|||
|
||||
struct amdgpu_gart {
|
||||
struct amdgpu_bo *bo;
|
||||
/* CPU kmapped address of gart table */
|
||||
void *ptr;
|
||||
unsigned num_gpu_pages;
|
||||
unsigned num_cpu_pages;
|
||||
|
|
|
@ -169,7 +169,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
|
|||
INIT_LIST_HEAD(&duplicates);
|
||||
|
||||
tv.bo = &bo->tbo;
|
||||
tv.shared = true;
|
||||
tv.num_shared = 1;
|
||||
list_add(&tv.head, &list);
|
||||
|
||||
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
|
||||
|
@ -604,7 +604,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
|
|||
return -ENOENT;
|
||||
abo = gem_to_amdgpu_bo(gobj);
|
||||
tv.bo = &abo->tbo;
|
||||
tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
|
||||
if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
|
||||
tv.num_shared = 1;
|
||||
else
|
||||
tv.num_shared = 0;
|
||||
list_add(&tv.head, &list);
|
||||
} else {
|
||||
gobj = NULL;
|
||||
|
|
|
@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
|
|||
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
|
||||
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
|
||||
|
||||
extern const struct dma_buf_ops amdgpu_dmabuf_ops;
|
||||
|
||||
/*
|
||||
* GEM objects.
|
||||
*/
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <drm/drmP.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include "amdgpu_rlc.h"
|
||||
|
||||
/* delay 0.1 second to enable gfx off feature */
|
||||
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
|
||||
|
@ -249,7 +250,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
|
|||
ring->adev = NULL;
|
||||
ring->ring_obj = NULL;
|
||||
ring->use_doorbell = true;
|
||||
ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
|
||||
ring->doorbell_index = adev->doorbell_index.kiq;
|
||||
|
||||
r = amdgpu_gfx_kiq_acquire(adev, ring);
|
||||
if (r)
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
*/
|
||||
#include "clearstate_defs.h"
|
||||
#include "amdgpu_ring.h"
|
||||
#include "amdgpu_rlc.h"
|
||||
|
||||
/* GFX current status */
|
||||
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
|
||||
|
@ -37,59 +38,6 @@
|
|||
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
|
||||
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
|
||||
|
||||
|
||||
struct amdgpu_rlc_funcs {
|
||||
void (*enter_safe_mode)(struct amdgpu_device *adev);
|
||||
void (*exit_safe_mode)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_rlc {
|
||||
/* for power gating */
|
||||
struct amdgpu_bo *save_restore_obj;
|
||||
uint64_t save_restore_gpu_addr;
|
||||
volatile uint32_t *sr_ptr;
|
||||
const u32 *reg_list;
|
||||
u32 reg_list_size;
|
||||
/* for clear state */
|
||||
struct amdgpu_bo *clear_state_obj;
|
||||
uint64_t clear_state_gpu_addr;
|
||||
volatile uint32_t *cs_ptr;
|
||||
const struct cs_section_def *cs_data;
|
||||
u32 clear_state_size;
|
||||
/* for cp tables */
|
||||
struct amdgpu_bo *cp_table_obj;
|
||||
uint64_t cp_table_gpu_addr;
|
||||
volatile uint32_t *cp_table_ptr;
|
||||
u32 cp_table_size;
|
||||
|
||||
/* safe mode for updating CG/PG state */
|
||||
bool in_safe_mode;
|
||||
const struct amdgpu_rlc_funcs *funcs;
|
||||
|
||||
/* for firmware data */
|
||||
u32 save_and_restore_offset;
|
||||
u32 clear_state_descriptor_offset;
|
||||
u32 avail_scratch_ram_locations;
|
||||
u32 reg_restore_list_size;
|
||||
u32 reg_list_format_start;
|
||||
u32 reg_list_format_separate_start;
|
||||
u32 starting_offsets_start;
|
||||
u32 reg_list_format_size_bytes;
|
||||
u32 reg_list_size_bytes;
|
||||
u32 reg_list_format_direct_reg_list_length;
|
||||
u32 save_restore_list_cntl_size_bytes;
|
||||
u32 save_restore_list_gpm_size_bytes;
|
||||
u32 save_restore_list_srm_size_bytes;
|
||||
|
||||
u32 *register_list_format;
|
||||
u32 *register_restore;
|
||||
u8 *save_restore_list_cntl;
|
||||
u8 *save_restore_list_gpm;
|
||||
u8 *save_restore_list_srm;
|
||||
|
||||
bool is_rlc_v2_1;
|
||||
};
|
||||
|
||||
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
|
||||
|
||||
struct amdgpu_mec {
|
||||
|
|
|
@ -64,7 +64,7 @@ struct amdgpu_vmhub {
|
|||
struct amdgpu_gmc_funcs {
|
||||
/* flush the vm tlb via mmio */
|
||||
void (*flush_gpu_tlb)(struct amdgpu_device *adev,
|
||||
uint32_t vmid);
|
||||
uint32_t vmid, uint32_t flush_type);
|
||||
/* flush the vm tlb via ring */
|
||||
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
|
||||
uint64_t pd_addr);
|
||||
|
@ -89,7 +89,7 @@ struct amdgpu_gmc_funcs {
|
|||
|
||||
struct amdgpu_xgmi {
|
||||
/* from psp */
|
||||
u64 device_id;
|
||||
u64 node_id;
|
||||
u64 hive_id;
|
||||
/* fixed per family */
|
||||
u64 node_segment_size;
|
||||
|
@ -99,6 +99,7 @@ struct amdgpu_xgmi {
|
|||
unsigned num_physical_nodes;
|
||||
/* gpu list in the same hive */
|
||||
struct list_head head;
|
||||
bool supported;
|
||||
};
|
||||
|
||||
struct amdgpu_gmc {
|
||||
|
@ -151,7 +152,7 @@ struct amdgpu_gmc {
|
|||
struct amdgpu_xgmi xgmi;
|
||||
};
|
||||
|
||||
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
|
||||
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
|
||||
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
|
||||
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
|
||||
#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
|
||||
|
|
|
@ -146,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
fence_ctx = 0;
|
||||
}
|
||||
|
||||
if (!ring->ready) {
|
||||
if (!ring->sched.ready) {
|
||||
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
|
||||
continue;
|
||||
|
||||
amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0,
|
||||
need_ctx_switch);
|
||||
amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch);
|
||||
need_ctx_switch = false;
|
||||
}
|
||||
|
||||
|
@ -347,19 +346,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
|
|||
tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
|
||||
}
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
for (i = 0; i < adev->num_rings; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
long tmo;
|
||||
|
||||
if (!ring || !ring->ready)
|
||||
continue;
|
||||
|
||||
/* skip IB tests for KIQ in general for the below reasons:
|
||||
* 1. We never submit IBs to the KIQ
|
||||
* 2. KIQ doesn't use the EOP interrupts,
|
||||
* we use some other CP interrupt.
|
||||
/* KIQ rings don't have an IB test because we never submit IBs
|
||||
* to them and they have no interrupt support.
|
||||
*/
|
||||
if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
|
||||
if (!ring->sched.ready || !ring->funcs->test_ib)
|
||||
continue;
|
||||
|
||||
/* MM engine need more time */
|
||||
|
@ -374,20 +368,23 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
|
|||
tmo = tmo_gfx;
|
||||
|
||||
r = amdgpu_ring_test_ib(ring, tmo);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
if (!r) {
|
||||
DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n",
|
||||
ring->name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ring == &adev->gfx.gfx_ring[0]) {
|
||||
/* oh, oh, that's really bad */
|
||||
DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r);
|
||||
adev->accel_working = false;
|
||||
return r;
|
||||
ring->sched.ready = false;
|
||||
DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n",
|
||||
ring->name, r);
|
||||
|
||||
} else {
|
||||
/* still not good, but we can live with it */
|
||||
DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
|
||||
ret = r;
|
||||
}
|
||||
if (ring == &adev->gfx.gfx_ring[0]) {
|
||||
/* oh, oh, that's really bad */
|
||||
adev->accel_working = false;
|
||||
return r;
|
||||
|
||||
} else {
|
||||
ret = r;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
|
|
@ -51,14 +51,12 @@ struct amdgpu_ih_ring {
|
|||
struct amdgpu_ih_funcs {
|
||||
/* ring read/write ptr handling, called from interrupt context */
|
||||
u32 (*get_wptr)(struct amdgpu_device *adev);
|
||||
bool (*prescreen_iv)(struct amdgpu_device *adev);
|
||||
void (*decode_iv)(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry);
|
||||
void (*set_rptr)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
|
||||
#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
|
||||
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
|
||||
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
|
||||
|
||||
|
|
|
@ -93,23 +93,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
|
|||
drm_helper_hpd_irq_event(dev);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_irq_reset_work_func - execute GPU reset
|
||||
*
|
||||
* @work: work struct pointer
|
||||
*
|
||||
* Execute scheduled GPU reset (Cayman+).
|
||||
* This function is called when the IRQ handler thinks we need a GPU reset.
|
||||
*/
|
||||
static void amdgpu_irq_reset_work_func(struct work_struct *work)
|
||||
{
|
||||
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
|
||||
reset_work);
|
||||
|
||||
if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
|
||||
amdgpu_device_gpu_recover(adev, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_irq_disable_all - disable *all* interrupts
|
||||
*
|
||||
|
@ -162,13 +145,6 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev,
|
|||
u32 ring_index = ih->rptr >> 2;
|
||||
struct amdgpu_iv_entry entry;
|
||||
|
||||
/* Prescreening of high-frequency interrupts */
|
||||
if (!amdgpu_ih_prescreen_iv(adev))
|
||||
return;
|
||||
|
||||
/* Before dispatching irq to IP blocks, send it to amdkfd */
|
||||
amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]);
|
||||
|
||||
entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
|
||||
amdgpu_ih_decode_iv(adev, &entry);
|
||||
|
||||
|
@ -262,15 +238,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
|
|||
amdgpu_hotplug_work_func);
|
||||
}
|
||||
|
||||
INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func);
|
||||
|
||||
adev->irq.installed = true;
|
||||
r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
|
||||
if (r) {
|
||||
adev->irq.installed = false;
|
||||
if (!amdgpu_device_has_dc_support(adev))
|
||||
flush_work(&adev->hotplug_work);
|
||||
cancel_work_sync(&adev->reset_work);
|
||||
return r;
|
||||
}
|
||||
adev->ddev->max_vblank_count = 0x00ffffff;
|
||||
|
@ -299,7 +272,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
|
|||
pci_disable_msi(adev->pdev);
|
||||
if (!amdgpu_device_has_dc_support(adev))
|
||||
flush_work(&adev->hotplug_work);
|
||||
cancel_work_sync(&adev->reset_work);
|
||||
}
|
||||
|
||||
for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
|
||||
|
@ -392,39 +364,38 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
|
|||
unsigned client_id = entry->client_id;
|
||||
unsigned src_id = entry->src_id;
|
||||
struct amdgpu_irq_src *src;
|
||||
bool handled = false;
|
||||
int r;
|
||||
|
||||
trace_amdgpu_iv(entry);
|
||||
|
||||
if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
|
||||
DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
|
||||
return;
|
||||
}
|
||||
|
||||
if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
|
||||
} else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
|
||||
DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
|
||||
return;
|
||||
}
|
||||
|
||||
if (adev->irq.virq[src_id]) {
|
||||
} else if (adev->irq.virq[src_id]) {
|
||||
generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id));
|
||||
} else {
|
||||
if (!adev->irq.client[client_id].sources) {
|
||||
DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
|
||||
client_id, src_id);
|
||||
return;
|
||||
}
|
||||
|
||||
src = adev->irq.client[client_id].sources[src_id];
|
||||
if (!src) {
|
||||
DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
|
||||
return;
|
||||
}
|
||||
} else if (!adev->irq.client[client_id].sources) {
|
||||
DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
|
||||
client_id, src_id);
|
||||
|
||||
} else if ((src = adev->irq.client[client_id].sources[src_id])) {
|
||||
r = src->funcs->process(adev, src, entry);
|
||||
if (r)
|
||||
if (r < 0)
|
||||
DRM_ERROR("error processing interrupt (%d)\n", r);
|
||||
else if (r)
|
||||
handled = true;
|
||||
|
||||
} else {
|
||||
DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
|
||||
}
|
||||
|
||||
/* Send it to amdkfd as well if it isn't already handled */
|
||||
if (!handled)
|
||||
amdgpu_amdkfd_interrupt(adev, entry->iv_entry);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -112,6 +112,8 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
|
|||
struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
|
||||
struct amdgpu_job *job = to_amdgpu_job(s_job);
|
||||
|
||||
drm_sched_job_cleanup(s_job);
|
||||
|
||||
amdgpu_ring_priority_put(ring, s_job->s_priority);
|
||||
dma_fence_put(job->fence);
|
||||
amdgpu_sync_free(&job->sync);
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
#define to_amdgpu_job(sched_job) \
|
||||
container_of((sched_job), struct amdgpu_job, base)
|
||||
|
||||
#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0)
|
||||
|
||||
struct amdgpu_fence;
|
||||
|
||||
struct amdgpu_job {
|
||||
|
|
|
@ -336,7 +336,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
case AMDGPU_HW_IP_GFX:
|
||||
type = AMD_IP_BLOCK_TYPE_GFX;
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
if (adev->gfx.gfx_ring[i].ready)
|
||||
if (adev->gfx.gfx_ring[i].sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 32;
|
||||
ib_size_alignment = 32;
|
||||
|
@ -344,7 +344,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
case AMDGPU_HW_IP_COMPUTE:
|
||||
type = AMD_IP_BLOCK_TYPE_GFX;
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
if (adev->gfx.compute_ring[i].ready)
|
||||
if (adev->gfx.compute_ring[i].sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 32;
|
||||
ib_size_alignment = 32;
|
||||
|
@ -352,7 +352,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
case AMDGPU_HW_IP_DMA:
|
||||
type = AMD_IP_BLOCK_TYPE_SDMA;
|
||||
for (i = 0; i < adev->sdma.num_instances; i++)
|
||||
if (adev->sdma.instance[i].ring.ready)
|
||||
if (adev->sdma.instance[i].ring.sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 256;
|
||||
ib_size_alignment = 4;
|
||||
|
@ -363,7 +363,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
if (adev->uvd.harvest_config & (1 << i))
|
||||
continue;
|
||||
|
||||
if (adev->uvd.inst[i].ring.ready)
|
||||
if (adev->uvd.inst[i].ring.sched.ready)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 64;
|
||||
|
@ -372,7 +372,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
case AMDGPU_HW_IP_VCE:
|
||||
type = AMD_IP_BLOCK_TYPE_VCE;
|
||||
for (i = 0; i < adev->vce.num_rings; i++)
|
||||
if (adev->vce.ring[i].ready)
|
||||
if (adev->vce.ring[i].sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 4;
|
||||
ib_size_alignment = 1;
|
||||
|
@ -384,7 +384,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
continue;
|
||||
|
||||
for (j = 0; j < adev->uvd.num_enc_rings; j++)
|
||||
if (adev->uvd.inst[i].ring_enc[j].ready)
|
||||
if (adev->uvd.inst[i].ring_enc[j].sched.ready)
|
||||
++num_rings;
|
||||
}
|
||||
ib_start_alignment = 64;
|
||||
|
@ -392,7 +392,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
break;
|
||||
case AMDGPU_HW_IP_VCN_DEC:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
if (adev->vcn.ring_dec.ready)
|
||||
if (adev->vcn.ring_dec.sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 16;
|
||||
ib_size_alignment = 16;
|
||||
|
@ -400,14 +400,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
|
|||
case AMDGPU_HW_IP_VCN_ENC:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
for (i = 0; i < adev->vcn.num_enc_rings; i++)
|
||||
if (adev->vcn.ring_enc[i].ready)
|
||||
if (adev->vcn.ring_enc[i].sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 64;
|
||||
ib_size_alignment = 1;
|
||||
break;
|
||||
case AMDGPU_HW_IP_VCN_JPEG:
|
||||
type = AMD_IP_BLOCK_TYPE_VCN;
|
||||
if (adev->vcn.ring_jpeg.ready)
|
||||
if (adev->vcn.ring_jpeg.sched.ready)
|
||||
++num_rings;
|
||||
ib_start_alignment = 16;
|
||||
ib_size_alignment = 16;
|
||||
|
@ -978,7 +978,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
|
|||
}
|
||||
|
||||
if (amdgpu_sriov_vf(adev)) {
|
||||
r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
|
||||
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
|
||||
|
||||
r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
|
||||
&fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE);
|
||||
if (r)
|
||||
goto error_vm;
|
||||
}
|
||||
|
@ -1048,8 +1051,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
|
|||
pasid = fpriv->vm.pasid;
|
||||
pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
|
||||
|
||||
amdgpu_vm_fini(adev, &fpriv->vm);
|
||||
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
|
||||
amdgpu_vm_fini(adev, &fpriv->vm);
|
||||
|
||||
if (pasid)
|
||||
amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
|
||||
|
|
|
@ -38,7 +38,6 @@
|
|||
#include <drm/drm_crtc_helper.h>
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#include <drm/drm_plane_helper.h>
|
||||
#include <drm/drm_fb_helper.h>
|
||||
#include <linux/i2c.h>
|
||||
#include <linux/i2c-algo-bit.h>
|
||||
#include <linux/hrtimer.h>
|
||||
|
@ -57,7 +56,6 @@ struct amdgpu_hpd;
|
|||
#define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base)
|
||||
#define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base)
|
||||
#define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base)
|
||||
#define to_amdgpu_plane(x) container_of(x, struct amdgpu_plane, base)
|
||||
|
||||
#define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base);
|
||||
|
||||
|
@ -295,13 +293,6 @@ struct amdgpu_display_funcs {
|
|||
uint16_t connector_object_id,
|
||||
struct amdgpu_hpd *hpd,
|
||||
struct amdgpu_router *router);
|
||||
/* it is used to enter or exit into free sync mode */
|
||||
int (*notify_freesync)(struct drm_device *dev, void *data,
|
||||
struct drm_file *filp);
|
||||
/* it is used to allow enablement of freesync mode */
|
||||
int (*set_freesync_property)(struct drm_connector *connector,
|
||||
struct drm_property *property,
|
||||
uint64_t val);
|
||||
|
||||
|
||||
};
|
||||
|
@ -325,7 +316,7 @@ struct amdgpu_mode_info {
|
|||
struct card_info *atom_card_info;
|
||||
bool mode_config_initialized;
|
||||
struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS];
|
||||
struct amdgpu_plane *planes[AMDGPU_MAX_PLANES];
|
||||
struct drm_plane *planes[AMDGPU_MAX_PLANES];
|
||||
struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS];
|
||||
/* DVI-I properties */
|
||||
struct drm_property *coherent_mode_property;
|
||||
|
@ -341,6 +332,8 @@ struct amdgpu_mode_info {
|
|||
struct drm_property *dither_property;
|
||||
/* maximum number of bits per channel for monitor color */
|
||||
struct drm_property *max_bpc_property;
|
||||
/* Adaptive Backlight Modulation (power feature) */
|
||||
struct drm_property *abm_level_property;
|
||||
/* hardcoded DFP edid from BIOS */
|
||||
struct edid *bios_hardcoded_edid;
|
||||
int bios_hardcoded_edid_size;
|
||||
|
@ -436,11 +429,6 @@ struct amdgpu_crtc {
|
|||
struct drm_pending_vblank_event *event;
|
||||
};
|
||||
|
||||
struct amdgpu_plane {
|
||||
struct drm_plane base;
|
||||
enum drm_plane_type plane_type;
|
||||
};
|
||||
|
||||
struct amdgpu_encoder_atom_dig {
|
||||
bool linkb;
|
||||
/* atom dig */
|
||||
|
|
|
@ -81,7 +81,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
|
|||
amdgpu_bo_subtract_pin_size(bo);
|
||||
|
||||
if (bo->kfd_bo)
|
||||
amdgpu_amdkfd_unreserve_system_memory_limit(bo);
|
||||
amdgpu_amdkfd_unreserve_memory_limit(bo);
|
||||
|
||||
amdgpu_bo_kunmap(bo);
|
||||
|
||||
|
@ -607,53 +607,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
|
|||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object
|
||||
* @adev: amdgpu device object
|
||||
* @ring: amdgpu_ring for the engine handling the buffer operations
|
||||
* @bo: &amdgpu_bo buffer to be backed up
|
||||
* @resv: reservation object with embedded fence
|
||||
* @fence: dma_fence associated with the operation
|
||||
* @direct: whether to submit the job directly
|
||||
*
|
||||
* Copies an &amdgpu_bo buffer object to its shadow object.
|
||||
* Not used for now.
|
||||
*
|
||||
* Returns:
|
||||
* 0 for success or a negative error code on failure.
|
||||
*/
|
||||
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
struct amdgpu_bo *bo,
|
||||
struct reservation_object *resv,
|
||||
struct dma_fence **fence,
|
||||
bool direct)
|
||||
|
||||
{
|
||||
struct amdgpu_bo *shadow = bo->shadow;
|
||||
uint64_t bo_addr, shadow_addr;
|
||||
int r;
|
||||
|
||||
if (!shadow)
|
||||
return -EINVAL;
|
||||
|
||||
bo_addr = amdgpu_bo_gpu_offset(bo);
|
||||
shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
|
||||
|
||||
r = reservation_object_reserve_shared(bo->tbo.resv);
|
||||
if (r)
|
||||
goto err;
|
||||
|
||||
r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
|
||||
amdgpu_bo_size(bo), resv, fence,
|
||||
direct, false);
|
||||
if (!r)
|
||||
amdgpu_bo_fence(bo, *fence, true);
|
||||
|
||||
err:
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_bo_validate - validate an &amdgpu_bo buffer object
|
||||
* @bo: pointer to the buffer object
|
||||
|
|
|
@ -267,11 +267,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
|
|||
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
|
||||
bool shared);
|
||||
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
|
||||
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring,
|
||||
struct amdgpu_bo *bo,
|
||||
struct reservation_object *resv,
|
||||
struct dma_fence **fence, bool direct);
|
||||
int amdgpu_bo_validate(struct amdgpu_bo *bo);
|
||||
int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
|
||||
struct dma_fence **fence);
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
#include <linux/hwmon.h>
|
||||
#include <linux/hwmon-sysfs.h>
|
||||
#include <linux/nospec.h>
|
||||
#include "hwmgr.h"
|
||||
#define WIDTH_4K 3840
|
||||
|
||||
static int amdgpu_debugfs_pm_init(struct amdgpu_device *adev);
|
||||
|
||||
|
@ -1642,6 +1644,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
|
|||
attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
|
||||
return 0;
|
||||
|
||||
/* Skip fan attributes on APU */
|
||||
if ((adev->flags & AMD_IS_APU) &&
|
||||
(attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
|
||||
attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
|
||||
return 0;
|
||||
|
||||
/* Skip limit attributes if DPM is not enabled */
|
||||
if (!adev->pm.dpm_enabled &&
|
||||
(attr == &sensor_dev_attr_temp1_crit.dev_attr.attr ||
|
||||
|
@ -1956,6 +1971,17 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
|
|||
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
|
||||
mutex_unlock(&adev->pm.mutex);
|
||||
}
|
||||
/* enable/disable Low Memory PState for UVD (4k videos) */
|
||||
if (adev->asic_type == CHIP_STONEY &&
|
||||
adev->uvd.decode_image_width >= WIDTH_4K) {
|
||||
struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
|
||||
|
||||
if (hwmgr && hwmgr->hwmgr_func &&
|
||||
hwmgr->hwmgr_func->update_nbdpm_pstate)
|
||||
hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr,
|
||||
!enable,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
|
||||
|
@ -2129,7 +2155,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
|
|||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
if (ring && ring->ready)
|
||||
if (ring && ring->sched.ready)
|
||||
amdgpu_fence_wait_empty(ring);
|
||||
}
|
||||
|
||||
|
|
|
@ -39,8 +39,6 @@
|
|||
#include <drm/amdgpu_drm.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
static const struct dma_buf_ops amdgpu_dmabuf_ops;
|
||||
|
||||
/**
|
||||
* amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
|
||||
* implementation
|
||||
|
@ -332,15 +330,13 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
||||
const struct dma_buf_ops amdgpu_dmabuf_ops = {
|
||||
.attach = amdgpu_gem_map_attach,
|
||||
.detach = amdgpu_gem_map_detach,
|
||||
.map_dma_buf = drm_gem_map_dma_buf,
|
||||
.unmap_dma_buf = drm_gem_unmap_dma_buf,
|
||||
.release = drm_gem_dmabuf_release,
|
||||
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
|
||||
.map = drm_gem_dmabuf_kmap,
|
||||
.unmap = drm_gem_dmabuf_kunmap,
|
||||
.mmap = drm_gem_dmabuf_mmap,
|
||||
.vmap = drm_gem_dmabuf_vmap,
|
||||
.vunmap = drm_gem_dmabuf_vunmap,
|
||||
|
|
|
@ -90,6 +90,8 @@ static int psp_sw_fini(void *handle)
|
|||
adev->psp.sos_fw = NULL;
|
||||
release_firmware(adev->psp.asd_fw);
|
||||
adev->psp.asd_fw = NULL;
|
||||
release_firmware(adev->psp.ta_fw);
|
||||
adev->psp.ta_fw = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -118,22 +120,26 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
|
|||
static int
|
||||
psp_cmd_submit_buf(struct psp_context *psp,
|
||||
struct amdgpu_firmware_info *ucode,
|
||||
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr,
|
||||
int index)
|
||||
struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr)
|
||||
{
|
||||
int ret;
|
||||
int index;
|
||||
|
||||
memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
|
||||
|
||||
memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));
|
||||
|
||||
index = atomic_inc_return(&psp->fence_value);
|
||||
ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr,
|
||||
fence_mc_addr, index);
|
||||
|
||||
while (*((unsigned int *)psp->fence_buf) != index) {
|
||||
msleep(1);
|
||||
if (ret) {
|
||||
atomic_dec(&psp->fence_value);
|
||||
return ret;
|
||||
}
|
||||
|
||||
while (*((unsigned int *)psp->fence_buf) != index)
|
||||
msleep(1);
|
||||
|
||||
/* the status field must be 0 after FW is loaded */
|
||||
if (ucode && psp->cmd_buf_mem->resp.status) {
|
||||
DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
|
||||
|
@ -149,10 +155,22 @@ psp_cmd_submit_buf(struct psp_context *psp,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
bool psp_support_vmr_ring(struct psp_context *psp)
|
||||
{
|
||||
if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
|
||||
struct psp_gfx_cmd_resp *cmd,
|
||||
uint64_t tmr_mc, uint32_t size)
|
||||
{
|
||||
cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
|
||||
if (psp_support_vmr_ring(psp))
|
||||
cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
|
||||
else
|
||||
cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
|
||||
cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
|
||||
cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
|
||||
cmd->cmd.cmd_setup_tmr.buf_size = size;
|
||||
|
@ -186,12 +204,12 @@ static int psp_tmr_load(struct psp_context *psp)
|
|||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
|
||||
psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
|
||||
DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n",
|
||||
PSP_TMR_SIZE, psp->tmr_mc_addr);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr, 1);
|
||||
psp->fence_buf_mc_addr);
|
||||
if (ret)
|
||||
goto failed;
|
||||
|
||||
|
@ -258,13 +276,194 @@ static int psp_asd_load(struct psp_context *psp)
|
|||
psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr, 2);
|
||||
psp->fence_buf_mc_addr);
|
||||
|
||||
kfree(cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
|
||||
uint32_t xgmi_ta_size, uint32_t shared_size)
|
||||
{
|
||||
cmd->cmd_id = GFX_CMD_ID_LOAD_TA;
|
||||
cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc);
|
||||
cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc);
|
||||
cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size;
|
||||
|
||||
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared);
|
||||
cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared);
|
||||
cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size;
|
||||
}
|
||||
|
||||
static int psp_xgmi_init_shared_buf(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Allocate 16k memory aligned to 4k from Frame Buffer (local
|
||||
* physical) for xgmi ta <-> Driver
|
||||
*/
|
||||
ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&psp->xgmi_context.xgmi_shared_bo,
|
||||
&psp->xgmi_context.xgmi_shared_mc_addr,
|
||||
&psp->xgmi_context.xgmi_shared_buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_xgmi_load(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
struct psp_gfx_cmd_resp *cmd;
|
||||
|
||||
/*
|
||||
* TODO: bypass the loading in sriov for now
|
||||
*/
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
return 0;
|
||||
|
||||
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
|
||||
memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size);
|
||||
|
||||
psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr,
|
||||
psp->xgmi_context.xgmi_shared_mc_addr,
|
||||
psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
|
||||
if (!ret) {
|
||||
psp->xgmi_context.initialized = 1;
|
||||
psp->xgmi_context.session_id = cmd->resp.session_id;
|
||||
}
|
||||
|
||||
kfree(cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
uint32_t xgmi_session_id)
|
||||
{
|
||||
cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA;
|
||||
cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id;
|
||||
}
|
||||
|
||||
static int psp_xgmi_unload(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
struct psp_gfx_cmd_resp *cmd;
|
||||
|
||||
/*
|
||||
* TODO: bypass the unloading in sriov for now
|
||||
*/
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
return 0;
|
||||
|
||||
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
|
||||
kfree(cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
|
||||
uint32_t ta_cmd_id,
|
||||
uint32_t xgmi_session_id)
|
||||
{
|
||||
cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
|
||||
cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id;
|
||||
cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
|
||||
/* Note: cmd_invoke_cmd.buf is not used for now */
|
||||
}
|
||||
|
||||
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
|
||||
{
|
||||
int ret;
|
||||
struct psp_gfx_cmd_resp *cmd;
|
||||
|
||||
/*
|
||||
* TODO: bypass the loading in sriov for now
|
||||
*/
|
||||
if (amdgpu_sriov_vf(psp->adev))
|
||||
return 0;
|
||||
|
||||
cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!cmd)
|
||||
return -ENOMEM;
|
||||
|
||||
psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id,
|
||||
psp->xgmi_context.session_id);
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, NULL, cmd,
|
||||
psp->fence_buf_mc_addr);
|
||||
|
||||
kfree(cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_xgmi_terminate(struct psp_context *psp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!psp->xgmi_context.initialized)
|
||||
return 0;
|
||||
|
||||
ret = psp_xgmi_unload(psp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
psp->xgmi_context.initialized = 0;
|
||||
|
||||
/* free xgmi shared memory */
|
||||
amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo,
|
||||
&psp->xgmi_context.xgmi_shared_mc_addr,
|
||||
&psp->xgmi_context.xgmi_shared_buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int psp_xgmi_initialize(struct psp_context *psp)
|
||||
{
|
||||
struct ta_xgmi_shared_memory *xgmi_cmd;
|
||||
int ret;
|
||||
|
||||
if (!psp->xgmi_context.initialized) {
|
||||
ret = psp_xgmi_init_shared_buf(psp);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Load XGMI TA */
|
||||
ret = psp_xgmi_load(psp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Initialize XGMI session */
|
||||
xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf);
|
||||
memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));
|
||||
xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE;
|
||||
|
||||
ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int psp_hw_start(struct psp_context *psp)
|
||||
{
|
||||
struct amdgpu_device *adev = psp->adev;
|
||||
|
@ -292,6 +491,15 @@ static int psp_hw_start(struct psp_context *psp)
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1) {
|
||||
ret = psp_xgmi_initialize(psp);
|
||||
/* Warning the XGMI seesion initialize failure
|
||||
* Instead of stop driver initialization
|
||||
*/
|
||||
if (ret)
|
||||
dev_err(psp->adev->dev,
|
||||
"XGMI: Failed to initialize XGMI session\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -321,7 +529,7 @@ static int psp_np_fw_load(struct psp_context *psp)
|
|||
return ret;
|
||||
|
||||
ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
|
||||
psp->fence_buf_mc_addr, i + 3);
|
||||
psp->fence_buf_mc_addr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
@ -340,8 +548,10 @@ static int psp_load_fw(struct amdgpu_device *adev)
|
|||
int ret;
|
||||
struct psp_context *psp = &adev->psp;
|
||||
|
||||
if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset != 0)
|
||||
if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
|
||||
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
|
||||
goto skip_memalloc;
|
||||
}
|
||||
|
||||
psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
|
||||
if (!psp->cmd)
|
||||
|
@ -452,6 +662,10 @@ static int psp_hw_fini(void *handle)
|
|||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
psp->xgmi_context.initialized == 1)
|
||||
psp_xgmi_terminate(psp);
|
||||
|
||||
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
|
||||
|
||||
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
|
||||
|
@ -479,6 +693,15 @@ static int psp_suspend(void *handle)
|
|||
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
|
||||
return 0;
|
||||
|
||||
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
|
||||
psp->xgmi_context.initialized == 1) {
|
||||
ret = psp_xgmi_terminate(psp);
|
||||
if (ret) {
|
||||
DRM_ERROR("Failed to terminate xgmi ta\n");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
|
||||
if (ret) {
|
||||
DRM_ERROR("PSP ring stop failed\n");
|
||||
|
|
|
@ -27,14 +27,17 @@
|
|||
|
||||
#include "amdgpu.h"
|
||||
#include "psp_gfx_if.h"
|
||||
#include "ta_xgmi_if.h"
|
||||
|
||||
#define PSP_FENCE_BUFFER_SIZE 0x1000
|
||||
#define PSP_CMD_BUFFER_SIZE 0x1000
|
||||
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
|
||||
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
|
||||
#define PSP_XGMI_SHARED_MEM_SIZE 0x4000
|
||||
#define PSP_1_MEG 0x100000
|
||||
#define PSP_TMR_SIZE 0x400000
|
||||
|
||||
struct psp_context;
|
||||
struct psp_xgmi_node_info;
|
||||
struct psp_xgmi_topology_info;
|
||||
|
||||
enum psp_ring_type
|
||||
|
@ -80,12 +83,20 @@ struct psp_funcs
|
|||
enum AMDGPU_UCODE_ID ucode_type);
|
||||
bool (*smu_reload_quirk)(struct psp_context *psp);
|
||||
int (*mode1_reset)(struct psp_context *psp);
|
||||
uint64_t (*xgmi_get_device_id)(struct psp_context *psp);
|
||||
uint64_t (*xgmi_get_node_id)(struct psp_context *psp);
|
||||
uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
|
||||
int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
|
||||
struct psp_xgmi_topology_info *topology);
|
||||
struct psp_xgmi_topology_info *topology);
|
||||
int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
|
||||
struct psp_xgmi_topology_info *topology);
|
||||
struct psp_xgmi_topology_info *topology);
|
||||
};
|
||||
|
||||
struct psp_xgmi_context {
|
||||
uint8_t initialized;
|
||||
uint32_t session_id;
|
||||
struct amdgpu_bo *xgmi_shared_bo;
|
||||
uint64_t xgmi_shared_mc_addr;
|
||||
void *xgmi_shared_buf;
|
||||
};
|
||||
|
||||
struct psp_context
|
||||
|
@ -96,7 +107,7 @@ struct psp_context
|
|||
|
||||
const struct psp_funcs *funcs;
|
||||
|
||||
/* fence buffer */
|
||||
/* firmware buffer */
|
||||
struct amdgpu_bo *fw_pri_bo;
|
||||
uint64_t fw_pri_mc_addr;
|
||||
void *fw_pri_buf;
|
||||
|
@ -134,6 +145,16 @@ struct psp_context
|
|||
struct amdgpu_bo *cmd_buf_bo;
|
||||
uint64_t cmd_buf_mc_addr;
|
||||
struct psp_gfx_cmd_resp *cmd_buf_mem;
|
||||
|
||||
/* fence value associated with cmd buffer */
|
||||
atomic_t fence_value;
|
||||
|
||||
/* xgmi ta firmware and buffer */
|
||||
const struct firmware *ta_fw;
|
||||
uint32_t ta_xgmi_ucode_version;
|
||||
uint32_t ta_xgmi_ucode_size;
|
||||
uint8_t *ta_xgmi_start_addr;
|
||||
struct psp_xgmi_context xgmi_context;
|
||||
};
|
||||
|
||||
struct amdgpu_psp_funcs {
|
||||
|
@ -141,21 +162,17 @@ struct amdgpu_psp_funcs {
|
|||
enum AMDGPU_UCODE_ID);
|
||||
};
|
||||
|
||||
#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
|
||||
struct psp_xgmi_node_info {
|
||||
uint64_t node_id;
|
||||
uint8_t num_hops;
|
||||
uint8_t is_sharing_enabled;
|
||||
enum ta_xgmi_assigned_sdma_engine sdma_engine;
|
||||
};
|
||||
|
||||
struct psp_xgmi_topology_info {
|
||||
/* Generated by PSP to identify the GPU instance within xgmi connection */
|
||||
uint64_t device_id;
|
||||
/*
|
||||
* If all bits set to 0 , driver indicates it wants to retrieve the xgmi
|
||||
* connection vector topology, but not access enable the connections
|
||||
* if some or all bits are set to 1, driver indicates it want to retrieve the
|
||||
* current xgmi topology and access enable the link to GPU[i] associated
|
||||
* with the bit position in the vector.
|
||||
* On return,: bits indicated which xgmi links are present/active depending
|
||||
* on the value passed in. The relative bit offset for the relative GPU index
|
||||
* within the hive is always marked active.
|
||||
*/
|
||||
uint32_t connection_mask;
|
||||
uint32_t reserved; /* must be 0 */
|
||||
uint32_t num_nodes;
|
||||
struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
|
||||
};
|
||||
|
||||
#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
|
||||
|
@ -177,8 +194,8 @@ struct psp_xgmi_topology_info {
|
|||
((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
|
||||
#define psp_mode1_reset(psp) \
|
||||
((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
|
||||
#define psp_xgmi_get_device_id(psp) \
|
||||
((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0)
|
||||
#define psp_xgmi_get_node_id(psp) \
|
||||
((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp)) : 0)
|
||||
#define psp_xgmi_get_hive_id(psp) \
|
||||
((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
|
||||
#define psp_xgmi_get_topology_info(psp, num_device, topology) \
|
||||
|
@ -199,6 +216,9 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
|
|||
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
|
||||
|
||||
int psp_gpu_reset(struct amdgpu_device *adev);
|
||||
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
|
||||
bool psp_support_vmr_ring(struct psp_context *psp);
|
||||
|
||||
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -338,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
|
|||
*/
|
||||
void amdgpu_ring_fini(struct amdgpu_ring *ring)
|
||||
{
|
||||
ring->ready = false;
|
||||
ring->sched.ready = false;
|
||||
|
||||
/* Not to finish a ring which is not initialized */
|
||||
if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
|
||||
|
@ -397,7 +397,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
|
|||
{
|
||||
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
|
||||
|
||||
if (!ring->funcs->soft_recovery)
|
||||
if (!ring->funcs->soft_recovery || !fence)
|
||||
return false;
|
||||
|
||||
atomic_inc(&ring->adev->gpu_reset_counter);
|
||||
|
@ -500,3 +500,29 @@ static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
|
|||
debugfs_remove(ring->ent);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ring_test_helper - tests ring and set sched readiness status
|
||||
*
|
||||
* @ring: ring to try the recovery on
|
||||
*
|
||||
* Tests ring and set sched readiness status
|
||||
*
|
||||
* Returns 0 on success, error on failure.
|
||||
*/
|
||||
int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
int r;
|
||||
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r)
|
||||
DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n",
|
||||
ring->name, r);
|
||||
else
|
||||
DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n",
|
||||
ring->name);
|
||||
|
||||
ring->sched.ready = !r;
|
||||
return r;
|
||||
}
|
||||
|
|
|
@ -129,8 +129,9 @@ struct amdgpu_ring_funcs {
|
|||
unsigned emit_ib_size;
|
||||
/* command emit functions */
|
||||
void (*emit_ib)(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch);
|
||||
bool ctx_switch);
|
||||
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
|
||||
uint64_t seq, unsigned flags);
|
||||
void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
|
||||
|
@ -189,7 +190,6 @@ struct amdgpu_ring {
|
|||
uint64_t gpu_addr;
|
||||
uint64_t ptr_mask;
|
||||
uint32_t buf_mask;
|
||||
bool ready;
|
||||
u32 idx;
|
||||
u32 me;
|
||||
u32 pipe;
|
||||
|
@ -229,7 +229,7 @@ struct amdgpu_ring {
|
|||
#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
|
||||
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
|
||||
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
|
||||
#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
|
||||
#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c)))
|
||||
#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
|
||||
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
|
||||
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
|
||||
|
@ -313,4 +313,6 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
|
|||
ring->count_dw -= count_dw;
|
||||
}
|
||||
|
||||
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,282 @@
|
|||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
* Copyright 2008 Red Hat Inc.
|
||||
* Copyright 2009 Jerome Glisse.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <linux/firmware.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_gfx.h"
|
||||
#include "amdgpu_rlc.h"
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Set RLC enter into safe mode if RLC is enabled and haven't in safe mode.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->gfx.rlc.in_safe_mode)
|
||||
return;
|
||||
|
||||
/* if RLC is not enabled, do nothing */
|
||||
if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
|
||||
return;
|
||||
|
||||
if (adev->cg_flags &
|
||||
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
|
||||
adev->gfx.rlc.funcs->set_safe_mode(adev);
|
||||
adev->gfx.rlc.in_safe_mode = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Set RLC exit safe mode if RLC is enabled and have entered into safe mode.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev)
|
||||
{
|
||||
if (!(adev->gfx.rlc.in_safe_mode))
|
||||
return;
|
||||
|
||||
/* if RLC is not enabled, do nothing */
|
||||
if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev))
|
||||
return;
|
||||
|
||||
if (adev->cg_flags &
|
||||
(AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
|
||||
AMD_CG_SUPPORT_GFX_3D_CGCG)) {
|
||||
adev->gfx.rlc.funcs->unset_safe_mode(adev);
|
||||
adev->gfx.rlc.in_safe_mode = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_init_sr - Init save restore block
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @dws: the size of save restore block
|
||||
*
|
||||
* Allocate and setup value to save restore block of rlc.
|
||||
* Returns 0 on succeess or negative error code if allocate failed.
|
||||
*/
|
||||
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws)
|
||||
{
|
||||
const u32 *src_ptr;
|
||||
volatile u32 *dst_ptr;
|
||||
u32 i;
|
||||
int r;
|
||||
|
||||
/* allocate save restore block */
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.save_restore_obj,
|
||||
&adev->gfx.rlc.save_restore_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.sr_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* write the sr buffer */
|
||||
src_ptr = adev->gfx.rlc.reg_list;
|
||||
dst_ptr = adev->gfx.rlc.sr_ptr;
|
||||
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
|
||||
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_init_csb - Init clear state block
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Allocate and setup value to clear state block of rlc.
|
||||
* Returns 0 on succeess or negative error code if allocate failed.
|
||||
*/
|
||||
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev)
|
||||
{
|
||||
volatile u32 *dst_ptr;
|
||||
u32 dws;
|
||||
int r;
|
||||
|
||||
/* allocate clear state block */
|
||||
adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev);
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* set up the cs buffer */
|
||||
dst_ptr = adev->gfx.rlc.cs_ptr;
|
||||
adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
|
||||
amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_init_cpt - Init cp table
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Allocate and setup value to cp table of rlc.
|
||||
* Returns 0 on succeess or negative error code if allocate failed.
|
||||
*/
|
||||
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
|
||||
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.cp_table_obj,
|
||||
&adev->gfx.rlc.cp_table_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cp_table_ptr);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to create cp table bo\n", r);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* set up the cp table */
|
||||
amdgpu_gfx_rlc_setup_cp_table(adev);
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Write cp firmware data into cp table.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev)
|
||||
{
|
||||
const __le32 *fw_data;
|
||||
volatile u32 *dst_ptr;
|
||||
int me, i, max_me;
|
||||
u32 bo_offset = 0;
|
||||
u32 table_offset, table_size;
|
||||
|
||||
max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev);
|
||||
|
||||
/* write the cp table buffer */
|
||||
dst_ptr = adev->gfx.rlc.cp_table_ptr;
|
||||
for (me = 0; me < max_me; me++) {
|
||||
if (me == 0) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.ce_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 1) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.pfp_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 2) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.me_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 3) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
} else if (me == 4) {
|
||||
const struct gfx_firmware_header_v1_0 *hdr =
|
||||
(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
|
||||
fw_data = (const __le32 *)
|
||||
(adev->gfx.mec2_fw->data +
|
||||
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
|
||||
table_offset = le32_to_cpu(hdr->jt_offset);
|
||||
table_size = le32_to_cpu(hdr->jt_size);
|
||||
}
|
||||
|
||||
for (i = 0; i < table_size; i ++) {
|
||||
dst_ptr[bo_offset + i] =
|
||||
cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
|
||||
}
|
||||
|
||||
bo_offset += table_size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_gfx_rlc_fini - Free BO which used for RLC
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block
|
||||
* and rlc_jump_table_block.
|
||||
*/
|
||||
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
/* save restore block */
|
||||
if (adev->gfx.rlc.save_restore_obj) {
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj,
|
||||
&adev->gfx.rlc.save_restore_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.sr_ptr);
|
||||
}
|
||||
|
||||
/* clear state block */
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
|
||||
&adev->gfx.rlc.clear_state_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
|
||||
/* jump table block */
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
|
||||
&adev->gfx.rlc.cp_table_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.cp_table_ptr);
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Copyright 2014 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __AMDGPU_RLC_H__
|
||||
#define __AMDGPU_RLC_H__
|
||||
|
||||
#include "clearstate_defs.h"
|
||||
|
||||
struct amdgpu_rlc_funcs {
|
||||
bool (*is_rlc_enabled)(struct amdgpu_device *adev);
|
||||
void (*set_safe_mode)(struct amdgpu_device *adev);
|
||||
void (*unset_safe_mode)(struct amdgpu_device *adev);
|
||||
int (*init)(struct amdgpu_device *adev);
|
||||
u32 (*get_csb_size)(struct amdgpu_device *adev);
|
||||
void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
|
||||
int (*get_cp_table_num)(struct amdgpu_device *adev);
|
||||
int (*resume)(struct amdgpu_device *adev);
|
||||
void (*stop)(struct amdgpu_device *adev);
|
||||
void (*reset)(struct amdgpu_device *adev);
|
||||
void (*start)(struct amdgpu_device *adev);
|
||||
};
|
||||
|
||||
struct amdgpu_rlc {
|
||||
/* for power gating */
|
||||
struct amdgpu_bo *save_restore_obj;
|
||||
uint64_t save_restore_gpu_addr;
|
||||
volatile uint32_t *sr_ptr;
|
||||
const u32 *reg_list;
|
||||
u32 reg_list_size;
|
||||
/* for clear state */
|
||||
struct amdgpu_bo *clear_state_obj;
|
||||
uint64_t clear_state_gpu_addr;
|
||||
volatile uint32_t *cs_ptr;
|
||||
const struct cs_section_def *cs_data;
|
||||
u32 clear_state_size;
|
||||
/* for cp tables */
|
||||
struct amdgpu_bo *cp_table_obj;
|
||||
uint64_t cp_table_gpu_addr;
|
||||
volatile uint32_t *cp_table_ptr;
|
||||
u32 cp_table_size;
|
||||
|
||||
/* safe mode for updating CG/PG state */
|
||||
bool in_safe_mode;
|
||||
const struct amdgpu_rlc_funcs *funcs;
|
||||
|
||||
/* for firmware data */
|
||||
u32 save_and_restore_offset;
|
||||
u32 clear_state_descriptor_offset;
|
||||
u32 avail_scratch_ram_locations;
|
||||
u32 reg_restore_list_size;
|
||||
u32 reg_list_format_start;
|
||||
u32 reg_list_format_separate_start;
|
||||
u32 starting_offsets_start;
|
||||
u32 reg_list_format_size_bytes;
|
||||
u32 reg_list_size_bytes;
|
||||
u32 reg_list_format_direct_reg_list_length;
|
||||
u32 save_restore_list_cntl_size_bytes;
|
||||
u32 save_restore_list_gpm_size_bytes;
|
||||
u32 save_restore_list_srm_size_bytes;
|
||||
|
||||
u32 *register_list_format;
|
||||
u32 *register_restore;
|
||||
u8 *save_restore_list_cntl;
|
||||
u8 *save_restore_list_gpm;
|
||||
u8 *save_restore_list_srm;
|
||||
|
||||
bool is_rlc_v2_1;
|
||||
};
|
||||
|
||||
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev);
|
||||
int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws);
|
||||
int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev);
|
||||
int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev);
|
||||
void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
|
@ -28,17 +28,31 @@
|
|||
* GPU SDMA IP block helpers function.
|
||||
*/
|
||||
|
||||
struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
|
||||
struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++)
|
||||
if (&adev->sdma.instance[i].ring == ring)
|
||||
break;
|
||||
if (ring == &adev->sdma.instance[i].ring ||
|
||||
ring == &adev->sdma.instance[i].page)
|
||||
return &adev->sdma.instance[i];
|
||||
|
||||
if (i < AMDGPU_MAX_SDMA_INSTANCES)
|
||||
return &adev->sdma.instance[i];
|
||||
else
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
if (ring == &adev->sdma.instance[i].ring ||
|
||||
ring == &adev->sdma.instance[i].page) {
|
||||
*index = i;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ struct amdgpu_sdma_instance {
|
|||
uint32_t feature_version;
|
||||
|
||||
struct amdgpu_ring ring;
|
||||
struct amdgpu_ring page;
|
||||
bool burst_nop;
|
||||
};
|
||||
|
||||
|
@ -50,6 +51,7 @@ struct amdgpu_sdma {
|
|||
struct amdgpu_irq_src illegal_inst_irq;
|
||||
int num_instances;
|
||||
uint32_t srbm_soft_reset;
|
||||
bool has_page_queue;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -92,6 +94,7 @@ struct amdgpu_buffer_funcs {
|
|||
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
|
||||
|
||||
struct amdgpu_sdma_instance *
|
||||
amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
|
||||
amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
|
||||
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -218,6 +218,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
|
|||
TP_ARGS(vm, ring, job),
|
||||
TP_STRUCT__entry(
|
||||
__field(u32, pasid)
|
||||
__string(ring, ring->name)
|
||||
__field(u32, ring)
|
||||
__field(u32, vmid)
|
||||
__field(u32, vm_hub)
|
||||
|
@ -227,14 +228,14 @@ TRACE_EVENT(amdgpu_vm_grab_id,
|
|||
|
||||
TP_fast_assign(
|
||||
__entry->pasid = vm->pasid;
|
||||
__entry->ring = ring->idx;
|
||||
__assign_str(ring, ring->name)
|
||||
__entry->vmid = job->vmid;
|
||||
__entry->vm_hub = ring->funcs->vmhub,
|
||||
__entry->pd_addr = job->vm_pd_addr;
|
||||
__entry->needs_flush = job->vm_needs_flush;
|
||||
),
|
||||
TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
|
||||
__entry->pasid, __entry->ring, __entry->vmid,
|
||||
TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
|
||||
__entry->pasid, __get_str(ring), __entry->vmid,
|
||||
__entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
|
||||
);
|
||||
|
||||
|
@ -366,20 +367,20 @@ TRACE_EVENT(amdgpu_vm_flush,
|
|||
uint64_t pd_addr),
|
||||
TP_ARGS(ring, vmid, pd_addr),
|
||||
TP_STRUCT__entry(
|
||||
__field(u32, ring)
|
||||
__string(ring, ring->name)
|
||||
__field(u32, vmid)
|
||||
__field(u32, vm_hub)
|
||||
__field(u64, pd_addr)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->ring = ring->idx;
|
||||
__assign_str(ring, ring->name)
|
||||
__entry->vmid = vmid;
|
||||
__entry->vm_hub = ring->funcs->vmhub;
|
||||
__entry->pd_addr = pd_addr;
|
||||
),
|
||||
TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx",
|
||||
__entry->ring, __entry->vmid,
|
||||
TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx",
|
||||
__get_str(ring), __entry->vmid,
|
||||
__entry->vm_hub,__entry->pd_addr)
|
||||
);
|
||||
|
||||
|
|
|
@ -61,100 +61,6 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
|
|||
static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
|
||||
static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
|
||||
|
||||
/*
|
||||
* Global memory.
|
||||
*/
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_mem_global_init - Initialize and acquire reference to
|
||||
* memory object
|
||||
*
|
||||
* @ref: Object for initialization.
|
||||
*
|
||||
* This is called by drm_global_item_ref() when an object is being
|
||||
* initialized.
|
||||
*/
|
||||
static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
|
||||
{
|
||||
return ttm_mem_global_init(ref->object);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_mem_global_release - Drop reference to a memory object
|
||||
*
|
||||
* @ref: Object being removed
|
||||
*
|
||||
* This is called by drm_global_item_unref() when an object is being
|
||||
* released.
|
||||
*/
|
||||
static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
|
||||
{
|
||||
ttm_mem_global_release(ref->object);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_ttm_global_init - Initialize global TTM memory reference structures.
|
||||
*
|
||||
* @adev: AMDGPU device for which the global structures need to be registered.
|
||||
*
|
||||
* This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
|
||||
* during bring up.
|
||||
*/
|
||||
static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct drm_global_reference *global_ref;
|
||||
int r;
|
||||
|
||||
/* ensure reference is false in case init fails */
|
||||
adev->mman.mem_global_referenced = false;
|
||||
|
||||
global_ref = &adev->mman.mem_global_ref;
|
||||
global_ref->global_type = DRM_GLOBAL_TTM_MEM;
|
||||
global_ref->size = sizeof(struct ttm_mem_global);
|
||||
global_ref->init = &amdgpu_ttm_mem_global_init;
|
||||
global_ref->release = &amdgpu_ttm_mem_global_release;
|
||||
r = drm_global_item_ref(global_ref);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed setting up TTM memory accounting "
|
||||
"subsystem.\n");
|
||||
goto error_mem;
|
||||
}
|
||||
|
||||
adev->mman.bo_global_ref.mem_glob =
|
||||
adev->mman.mem_global_ref.object;
|
||||
global_ref = &adev->mman.bo_global_ref.ref;
|
||||
global_ref->global_type = DRM_GLOBAL_TTM_BO;
|
||||
global_ref->size = sizeof(struct ttm_bo_global);
|
||||
global_ref->init = &ttm_bo_global_init;
|
||||
global_ref->release = &ttm_bo_global_release;
|
||||
r = drm_global_item_ref(global_ref);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed setting up TTM BO subsystem.\n");
|
||||
goto error_bo;
|
||||
}
|
||||
|
||||
mutex_init(&adev->mman.gtt_window_lock);
|
||||
|
||||
adev->mman.mem_global_referenced = true;
|
||||
|
||||
return 0;
|
||||
|
||||
error_bo:
|
||||
drm_global_item_unref(&adev->mman.mem_global_ref);
|
||||
error_mem:
|
||||
return r;
|
||||
}
|
||||
|
||||
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
if (adev->mman.mem_global_referenced) {
|
||||
mutex_destroy(&adev->mman.gtt_window_lock);
|
||||
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
|
||||
drm_global_item_unref(&adev->mman.mem_global_ref);
|
||||
adev->mman.mem_global_referenced = false;
|
||||
}
|
||||
}
|
||||
|
||||
static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
|
||||
{
|
||||
return 0;
|
||||
|
@ -1758,14 +1664,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
|
|||
int r;
|
||||
u64 vis_vram_limit;
|
||||
|
||||
/* initialize global references for vram/gtt */
|
||||
r = amdgpu_ttm_global_init(adev);
|
||||
if (r) {
|
||||
return r;
|
||||
}
|
||||
mutex_init(&adev->mman.gtt_window_lock);
|
||||
|
||||
/* No others user of address space so set it to 0 */
|
||||
r = ttm_bo_device_init(&adev->mman.bdev,
|
||||
adev->mman.bo_global_ref.ref.object,
|
||||
&amdgpu_bo_driver,
|
||||
adev->ddev->anon_inode->i_mapping,
|
||||
DRM_FILE_PAGE_OFFSET,
|
||||
|
@ -1922,7 +1824,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
|
|||
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
|
||||
ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
|
||||
ttm_bo_device_release(&adev->mman.bdev);
|
||||
amdgpu_ttm_global_fini(adev);
|
||||
adev->mman.initialized = false;
|
||||
DRM_INFO("amdgpu: ttm finalized\n");
|
||||
}
|
||||
|
@ -2069,7 +1970,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
|
|||
unsigned i;
|
||||
int r;
|
||||
|
||||
if (direct_submit && !ring->ready) {
|
||||
if (direct_submit && !ring->sched.ready) {
|
||||
DRM_ERROR("Trying to move memory with ring turned off.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
|
@ -39,8 +39,6 @@
|
|||
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
|
||||
|
||||
struct amdgpu_mman {
|
||||
struct ttm_bo_global_ref bo_global_ref;
|
||||
struct drm_global_reference mem_global_ref;
|
||||
struct ttm_bo_device bdev;
|
||||
bool mem_global_referenced;
|
||||
bool initialized;
|
||||
|
|
|
@ -57,6 +57,17 @@ struct psp_firmware_header_v1_0 {
|
|||
uint32_t sos_size_bytes;
|
||||
};
|
||||
|
||||
/* version_major=1, version_minor=0 */
|
||||
struct ta_firmware_header_v1_0 {
|
||||
struct common_firmware_header header;
|
||||
uint32_t ta_xgmi_ucode_version;
|
||||
uint32_t ta_xgmi_offset_bytes;
|
||||
uint32_t ta_xgmi_size_bytes;
|
||||
uint32_t ta_ras_ucode_version;
|
||||
uint32_t ta_ras_offset_bytes;
|
||||
uint32_t ta_ras_size_bytes;
|
||||
};
|
||||
|
||||
/* version_major=1, version_minor=0 */
|
||||
struct gfx_firmware_header_v1_0 {
|
||||
struct common_firmware_header header;
|
||||
|
@ -170,6 +181,7 @@ union amdgpu_firmware_header {
|
|||
struct mc_firmware_header_v1_0 mc;
|
||||
struct smc_firmware_header_v1_0 smc;
|
||||
struct psp_firmware_header_v1_0 psp;
|
||||
struct ta_firmware_header_v1_0 ta;
|
||||
struct gfx_firmware_header_v1_0 gfx;
|
||||
struct rlc_firmware_header_v1_0 rlc;
|
||||
struct rlc_firmware_header_v2_0 rlc_v2_0;
|
||||
|
|
|
@ -692,6 +692,8 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
|
|||
buf_sizes[0x1] = dpb_size;
|
||||
buf_sizes[0x2] = image_size;
|
||||
buf_sizes[0x4] = min_ctx_size;
|
||||
/* store image width to adjust nb memory pstate */
|
||||
adev->uvd.decode_image_width = width;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1243,30 +1245,20 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
{
|
||||
struct dma_fence *fence;
|
||||
long r;
|
||||
uint32_t ip_instance = ring->me;
|
||||
|
||||
r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
dma_fence_put(fence);
|
||||
|
||||
|
|
|
@ -65,6 +65,8 @@ struct amdgpu_uvd {
|
|||
struct drm_sched_entity entity;
|
||||
struct delayed_work idle_work;
|
||||
unsigned harvest_config;
|
||||
/* store image width to adjust nb memory state */
|
||||
unsigned decode_image_width;
|
||||
};
|
||||
|
||||
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
|
||||
|
|
|
@ -1032,8 +1032,10 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
|
|||
* @ib: the IB to execute
|
||||
*
|
||||
*/
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
bool ctx_switch)
|
||||
{
|
||||
amdgpu_ring_write(ring, VCE_CMD_IB);
|
||||
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
|
||||
|
@ -1079,11 +1081,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
|
|||
return 0;
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 16);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, VCE_CMD_END);
|
||||
amdgpu_ring_commit(ring);
|
||||
|
||||
|
@ -1093,14 +1093,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
|
|||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed\n",
|
||||
ring->idx);
|
||||
if (i >= timeout)
|
||||
r = -ETIMEDOUT;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -1121,27 +1115,19 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
return 0;
|
||||
|
||||
r = amdgpu_vce_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
error:
|
||||
dma_fence_put(fence);
|
||||
return r;
|
||||
|
|
|
@ -65,8 +65,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
|
|||
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
|
||||
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
|
||||
int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch);
|
||||
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib, bool ctx_switch);
|
||||
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
|
||||
unsigned flags);
|
||||
int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
|
||||
|
|
|
@ -425,11 +425,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
|
|||
|
||||
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring,
|
||||
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
|
@ -441,14 +439,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
|
|||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -570,30 +563,20 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
long r;
|
||||
|
||||
r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
dma_fence_put(fence);
|
||||
|
||||
error:
|
||||
return r;
|
||||
}
|
||||
|
@ -606,11 +589,9 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
|
|||
int r;
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 16);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, VCN_ENC_CMD_END);
|
||||
amdgpu_ring_commit(ring);
|
||||
|
||||
|
@ -620,14 +601,8 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
|
|||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed\n",
|
||||
ring->idx);
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -742,27 +717,19 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
long r;
|
||||
|
||||
r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
if (r == 0)
|
||||
r = -ETIMEDOUT;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
} else {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
else if (r > 0)
|
||||
r = 0;
|
||||
}
|
||||
|
||||
error:
|
||||
dma_fence_put(fence);
|
||||
return r;
|
||||
|
@ -778,11 +745,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
|
|||
WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
|
||||
ring->idx, r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring,
|
||||
PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
|
||||
|
@ -796,14 +760,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
|
|||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
|
||||
ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -856,21 +814,18 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
long r = 0;
|
||||
|
||||
r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r);
|
||||
if (r)
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = dma_fence_wait_timeout(fence, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out.\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto error;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto error;
|
||||
} else
|
||||
} else {
|
||||
r = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
|
||||
|
@ -879,15 +834,10 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout)
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
else {
|
||||
DRM_ERROR("ib test failed (0x%08X)\n", tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
dma_fence_put(fence);
|
||||
|
||||
error:
|
||||
return r;
|
||||
}
|
||||
|
|
|
@ -23,16 +23,6 @@
|
|||
|
||||
#include "amdgpu.h"
|
||||
|
||||
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
|
||||
{
|
||||
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
|
||||
|
||||
addr -= AMDGPU_VA_RESERVED_SIZE;
|
||||
addr = amdgpu_gmc_sign_extend(addr);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
|
||||
{
|
||||
/* By now all MMIO pages except mailbox are blocked */
|
||||
|
@ -41,88 +31,6 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
|
|||
return RREG32_NO_KIQ(0xc040) == 0xffffffff;
|
||||
}
|
||||
|
||||
int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
|
||||
{
|
||||
int r;
|
||||
void *ptr;
|
||||
|
||||
r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj,
|
||||
&adev->virt.csa_vmid0_addr, &ptr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(ptr, 0, AMDGPU_CSA_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_free_static_csa(struct amdgpu_device *adev) {
|
||||
amdgpu_bo_free_kernel(&adev->virt.csa_obj,
|
||||
&adev->virt.csa_vmid0_addr,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* amdgpu_map_static_csa should be called during amdgpu_vm_init
|
||||
* it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
|
||||
* submission of GFX should use this virtual address within META_DATA init
|
||||
* package to support SRIOV gfx preemption.
|
||||
*/
|
||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va **bo_va)
|
||||
{
|
||||
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
|
||||
struct ww_acquire_ctx ticket;
|
||||
struct list_head list;
|
||||
struct amdgpu_bo_list_entry pd;
|
||||
struct ttm_validate_buffer csa_tv;
|
||||
int r;
|
||||
|
||||
INIT_LIST_HEAD(&list);
|
||||
INIT_LIST_HEAD(&csa_tv.head);
|
||||
csa_tv.bo = &adev->virt.csa_obj->tbo;
|
||||
csa_tv.shared = true;
|
||||
|
||||
list_add(&csa_tv.head, &list);
|
||||
amdgpu_vm_get_pd_bo(vm, &list, &pd);
|
||||
|
||||
r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
*bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj);
|
||||
if (!*bo_va) {
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
DRM_ERROR("failed to create bo_va for static CSA\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
|
||||
AMDGPU_CSA_SIZE);
|
||||
if (r) {
|
||||
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
|
||||
amdgpu_vm_bo_rmv(adev, *bo_va);
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
|
||||
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
|
||||
AMDGPU_PTE_EXECUTABLE);
|
||||
|
||||
if (r) {
|
||||
DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
|
||||
amdgpu_vm_bo_rmv(adev, *bo_va);
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return r;
|
||||
}
|
||||
|
||||
ttm_eu_backoff_reservation(&ticket, &list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void amdgpu_virt_init_setting(struct amdgpu_device *adev)
|
||||
{
|
||||
/* enable virtual display */
|
||||
|
@ -162,9 +70,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
|
|||
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
|
||||
goto failed_kiq_read;
|
||||
|
||||
if (in_interrupt())
|
||||
might_sleep();
|
||||
|
||||
might_sleep();
|
||||
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
|
||||
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
|
@ -210,9 +116,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
|
|||
if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
|
||||
goto failed_kiq_write;
|
||||
|
||||
if (in_interrupt())
|
||||
might_sleep();
|
||||
|
||||
might_sleep();
|
||||
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
|
||||
|
||||
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
|
||||
|
@ -228,6 +132,46 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
|
|||
pr_err("failed to write reg:%x\n", reg);
|
||||
}
|
||||
|
||||
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t reg1,
|
||||
uint32_t ref, uint32_t mask)
|
||||
{
|
||||
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
|
||||
struct amdgpu_ring *ring = &kiq->ring;
|
||||
signed long r, cnt = 0;
|
||||
unsigned long flags;
|
||||
uint32_t seq;
|
||||
|
||||
spin_lock_irqsave(&kiq->ring_lock, flags);
|
||||
amdgpu_ring_alloc(ring, 32);
|
||||
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
|
||||
ref, mask);
|
||||
amdgpu_fence_emit_polling(ring, &seq);
|
||||
amdgpu_ring_commit(ring);
|
||||
spin_unlock_irqrestore(&kiq->ring_lock, flags);
|
||||
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
|
||||
/* don't wait anymore for IRQ context */
|
||||
if (r < 1 && in_interrupt())
|
||||
goto failed_kiq;
|
||||
|
||||
might_sleep();
|
||||
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
|
||||
|
||||
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
|
||||
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
|
||||
}
|
||||
|
||||
if (cnt > MAX_KIQ_REG_TRY)
|
||||
goto failed_kiq;
|
||||
|
||||
return;
|
||||
|
||||
failed_kiq:
|
||||
pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_virt_request_full_gpu() - request full gpu access
|
||||
* @amdgpu: amdgpu device.
|
||||
|
@ -390,7 +334,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
|
|||
|
||||
if (adev->fw_vram_usage.va != NULL) {
|
||||
adev->virt.fw_reserve.p_pf2vf =
|
||||
(struct amdgim_pf2vf_info_header *)(
|
||||
(struct amd_sriov_msg_pf2vf_info_header *)(
|
||||
adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET);
|
||||
AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size);
|
||||
AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum);
|
||||
|
|
|
@ -63,8 +63,8 @@ struct amdgpu_virt_ops {
|
|||
* Firmware Reserve Frame buffer
|
||||
*/
|
||||
struct amdgpu_virt_fw_reserve {
|
||||
struct amdgim_pf2vf_info_header *p_pf2vf;
|
||||
struct amdgim_vf2pf_info_header *p_vf2pf;
|
||||
struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
|
||||
struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
|
||||
unsigned int checksum_key;
|
||||
};
|
||||
/*
|
||||
|
@ -85,15 +85,17 @@ enum AMDGIM_FEATURE_FLAG {
|
|||
AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4,
|
||||
};
|
||||
|
||||
struct amdgim_pf2vf_info_header {
|
||||
struct amd_sriov_msg_pf2vf_info_header {
|
||||
/* the total structure size in byte. */
|
||||
uint32_t size;
|
||||
/* version of this structure, written by the GIM */
|
||||
uint32_t version;
|
||||
/* reserved */
|
||||
uint32_t reserved[2];
|
||||
} __aligned(4);
|
||||
struct amdgim_pf2vf_info_v1 {
|
||||
/* header contains size and version */
|
||||
struct amdgim_pf2vf_info_header header;
|
||||
struct amd_sriov_msg_pf2vf_info_header header;
|
||||
/* max_width * max_height */
|
||||
unsigned int uvd_enc_max_pixels_count;
|
||||
/* 16x16 pixels/sec, codec independent */
|
||||
|
@ -112,7 +114,7 @@ struct amdgim_pf2vf_info_v1 {
|
|||
|
||||
struct amdgim_pf2vf_info_v2 {
|
||||
/* header contains size and version */
|
||||
struct amdgim_pf2vf_info_header header;
|
||||
struct amd_sriov_msg_pf2vf_info_header header;
|
||||
/* use private key from mailbox 2 to create chueksum */
|
||||
uint32_t checksum;
|
||||
/* The features flags of the GIM driver supports. */
|
||||
|
@ -137,20 +139,22 @@ struct amdgim_pf2vf_info_v2 {
|
|||
uint64_t vcefw_kboffset;
|
||||
/* VCE FW size in KB */
|
||||
uint32_t vcefw_ksize;
|
||||
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)];
|
||||
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 3)];
|
||||
} __aligned(4);
|
||||
|
||||
|
||||
struct amdgim_vf2pf_info_header {
|
||||
struct amd_sriov_msg_vf2pf_info_header {
|
||||
/* the total structure size in byte. */
|
||||
uint32_t size;
|
||||
/*version of this structure, written by the guest */
|
||||
uint32_t version;
|
||||
/* reserved */
|
||||
uint32_t reserved[2];
|
||||
} __aligned(4);
|
||||
|
||||
struct amdgim_vf2pf_info_v1 {
|
||||
/* header contains size and version */
|
||||
struct amdgim_vf2pf_info_header header;
|
||||
struct amd_sriov_msg_vf2pf_info_header header;
|
||||
/* driver version */
|
||||
char driver_version[64];
|
||||
/* driver certification, 1=WHQL, 0=None */
|
||||
|
@ -180,7 +184,7 @@ struct amdgim_vf2pf_info_v1 {
|
|||
|
||||
struct amdgim_vf2pf_info_v2 {
|
||||
/* header contains size and version */
|
||||
struct amdgim_vf2pf_info_header header;
|
||||
struct amd_sriov_msg_vf2pf_info_header header;
|
||||
uint32_t checksum;
|
||||
/* driver version */
|
||||
uint8_t driver_version[64];
|
||||
|
@ -206,7 +210,7 @@ struct amdgim_vf2pf_info_v2 {
|
|||
uint32_t uvd_enc_usage;
|
||||
/* guest uvd engine usage percentage. 0xffff means N/A. */
|
||||
uint32_t uvd_enc_health;
|
||||
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)];
|
||||
uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)];
|
||||
} __aligned(4);
|
||||
|
||||
#define AMDGPU_FW_VRAM_VF2PF_VER 2
|
||||
|
@ -238,7 +242,6 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
|
|||
struct amdgpu_virt {
|
||||
uint32_t caps;
|
||||
struct amdgpu_bo *csa_obj;
|
||||
uint64_t csa_vmid0_addr;
|
||||
bool chained_ib_support;
|
||||
uint32_t reg_val_offs;
|
||||
struct amdgpu_irq_src ack_irq;
|
||||
|
@ -251,8 +254,6 @@ struct amdgpu_virt {
|
|||
uint32_t gim_feature;
|
||||
};
|
||||
|
||||
#define AMDGPU_CSA_SIZE (8 * 1024)
|
||||
|
||||
#define amdgpu_sriov_enabled(adev) \
|
||||
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
|
||||
|
||||
|
@ -277,17 +278,13 @@ static inline bool is_virtual_machine(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
struct amdgpu_vm;
|
||||
|
||||
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
|
||||
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
|
||||
int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
|
||||
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
struct amdgpu_bo_va **bo_va);
|
||||
void amdgpu_free_static_csa(struct amdgpu_device *adev);
|
||||
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
|
||||
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
|
||||
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
|
||||
void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
|
||||
uint32_t reg0, uint32_t rreg1,
|
||||
uint32_t ref, uint32_t mask);
|
||||
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
|
||||
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
|
||||
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
|
||||
|
|
|
@ -617,7 +617,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
|
|||
{
|
||||
entry->priority = 0;
|
||||
entry->tv.bo = &vm->root.base.bo->tbo;
|
||||
entry->tv.shared = true;
|
||||
/* One for the VM updates, one for TTM and one for the CS job */
|
||||
entry->tv.num_shared = 3;
|
||||
entry->user_pages = NULL;
|
||||
list_add(&entry->tv.head, validated);
|
||||
}
|
||||
|
@ -773,10 +774,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
|||
|
||||
ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
|
||||
|
||||
r = reservation_object_reserve_shared(bo->tbo.resv);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
|
||||
if (r)
|
||||
goto error;
|
||||
|
@ -1844,10 +1841,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
|||
if (r)
|
||||
goto error_free;
|
||||
|
||||
r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
|
||||
if (r)
|
||||
goto error_free;
|
||||
|
||||
r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags);
|
||||
if (r)
|
||||
goto error_free;
|
||||
|
@ -3028,6 +3021,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
if (r)
|
||||
goto error_free_root;
|
||||
|
||||
r = reservation_object_reserve_shared(root->tbo.resv, 1);
|
||||
if (r)
|
||||
goto error_unreserve;
|
||||
|
||||
r = amdgpu_vm_clear_bo(adev, vm, root,
|
||||
adev->vm_manager.root_level,
|
||||
vm->pte_support_ats);
|
||||
|
@ -3057,7 +3054,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
}
|
||||
|
||||
INIT_KFIFO(vm->faults);
|
||||
vm->fault_credit = 16;
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -3269,42 +3265,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|||
amdgpu_vmid_free_reserved(adev, vm, i);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @pasid: PASID do identify the VM
|
||||
*
|
||||
* This function is expected to be called in interrupt context.
|
||||
*
|
||||
* Returns:
|
||||
* True if there was fault credit, false otherwise
|
||||
*/
|
||||
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
|
||||
unsigned int pasid)
|
||||
{
|
||||
struct amdgpu_vm *vm;
|
||||
|
||||
spin_lock(&adev->vm_manager.pasid_lock);
|
||||
vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
|
||||
if (!vm) {
|
||||
/* VM not found, can't track fault credit */
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* No lock needed. only accessed by IRQ handler */
|
||||
if (!vm->fault_credit) {
|
||||
/* Too many faults in this VM */
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
vm->fault_credit--;
|
||||
spin_unlock(&adev->vm_manager.pasid_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_manager_init - init the VM manager
|
||||
*
|
||||
|
|
|
@ -229,9 +229,6 @@ struct amdgpu_vm {
|
|||
/* Up to 128 pending retry page faults */
|
||||
DECLARE_KFIFO(faults, u64, 128);
|
||||
|
||||
/* Limit non-retry fault storms */
|
||||
unsigned int fault_credit;
|
||||
|
||||
/* Points to the KFD process VM info */
|
||||
struct amdkfd_process_info *process_info;
|
||||
|
||||
|
@ -299,8 +296,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|||
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
|
||||
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
|
||||
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
|
||||
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
|
||||
unsigned int pasid);
|
||||
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
|
||||
struct list_head *validated,
|
||||
struct amdgpu_bo_list_entry *entry);
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
*/
|
||||
#include <linux/list.h>
|
||||
#include "amdgpu.h"
|
||||
#include "amdgpu_psp.h"
|
||||
#include "amdgpu_xgmi.h"
|
||||
|
||||
|
||||
static DEFINE_MUTEX(xgmi_mutex);
|
||||
|
@ -31,15 +31,16 @@ static DEFINE_MUTEX(xgmi_mutex);
|
|||
#define AMDGPU_MAX_XGMI_HIVE 8
|
||||
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
|
||||
|
||||
struct amdgpu_hive_info {
|
||||
uint64_t hive_id;
|
||||
struct list_head device_list;
|
||||
};
|
||||
|
||||
static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
|
||||
static unsigned hive_count = 0;
|
||||
|
||||
static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
||||
|
||||
void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
|
||||
{
|
||||
return &hive->device_list;
|
||||
}
|
||||
|
||||
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
||||
{
|
||||
int i;
|
||||
struct amdgpu_hive_info *tmp;
|
||||
|
@ -58,62 +59,99 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
|
|||
tmp = &xgmi_hives[hive_count++];
|
||||
tmp->hive_id = adev->gmc.xgmi.hive_id;
|
||||
INIT_LIST_HEAD(&tmp->device_list);
|
||||
mutex_init(&tmp->hive_lock);
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
|
||||
/* Each psp need to set the latest topology */
|
||||
ret = psp_xgmi_set_topology_info(&adev->psp,
|
||||
hive->number_devices,
|
||||
&hive->topology_info);
|
||||
if (ret)
|
||||
dev_err(adev->dev,
|
||||
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
|
||||
adev->gmc.xgmi.node_id,
|
||||
adev->gmc.xgmi.hive_id, ret);
|
||||
else
|
||||
dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n",
|
||||
adev->gmc.xgmi.physical_node_id,
|
||||
adev->gmc.xgmi.hive_id);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
|
||||
{
|
||||
struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE];
|
||||
struct psp_xgmi_topology_info *hive_topology;
|
||||
struct amdgpu_hive_info *hive;
|
||||
struct amdgpu_xgmi *entry;
|
||||
struct amdgpu_device *tmp_adev;
|
||||
struct amdgpu_device *tmp_adev = NULL;
|
||||
|
||||
int count = 0, ret = -EINVAL;
|
||||
|
||||
if ((adev->asic_type < CHIP_VEGA20) ||
|
||||
(adev->flags & AMD_IS_APU) )
|
||||
if (!adev->gmc.xgmi.supported)
|
||||
return 0;
|
||||
adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp);
|
||||
|
||||
adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
|
||||
adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
|
||||
|
||||
memset(&tmp_topology[0], 0, sizeof(tmp_topology));
|
||||
mutex_lock(&xgmi_mutex);
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (!hive)
|
||||
goto exit;
|
||||
|
||||
hive_topology = &hive->topology_info;
|
||||
|
||||
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
|
||||
list_for_each_entry(entry, &hive->device_list, head)
|
||||
tmp_topology[count++].device_id = entry->device_id;
|
||||
hive_topology->nodes[count++].node_id = entry->node_id;
|
||||
hive->number_devices = count;
|
||||
|
||||
ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology);
|
||||
if (ret) {
|
||||
dev_err(adev->dev,
|
||||
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
|
||||
adev->gmc.xgmi.device_id,
|
||||
adev->gmc.xgmi.hive_id, ret);
|
||||
goto exit;
|
||||
}
|
||||
/* Each psp need to set the latest topology */
|
||||
/* Each psp need to get the latest topology */
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
|
||||
ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
|
||||
if (ret) {
|
||||
dev_err(tmp_adev->dev,
|
||||
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
|
||||
tmp_adev->gmc.xgmi.device_id,
|
||||
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
|
||||
tmp_adev->gmc.xgmi.node_id,
|
||||
tmp_adev->gmc.xgmi.hive_id, ret);
|
||||
/* To do : continue with some node failed or disable the whole hive */
|
||||
/* To do : continue with some node failed or disable the whole hive */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!ret)
|
||||
dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
|
||||
adev->gmc.xgmi.physical_node_id,
|
||||
adev->gmc.xgmi.hive_id);
|
||||
|
||||
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
|
||||
ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
exit:
|
||||
mutex_unlock(&xgmi_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_hive_info *hive;
|
||||
|
||||
if (!adev->gmc.xgmi.supported)
|
||||
return;
|
||||
|
||||
mutex_lock(&xgmi_mutex);
|
||||
|
||||
hive = amdgpu_get_xgmi_hive(adev);
|
||||
if (!hive)
|
||||
goto exit;
|
||||
|
||||
if (!(hive->number_devices--))
|
||||
mutex_destroy(&hive->hive_lock);
|
||||
|
||||
exit:
|
||||
mutex_unlock(&xgmi_mutex);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright 2016 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __AMDGPU_XGMI_H__
|
||||
#define __AMDGPU_XGMI_H__
|
||||
|
||||
#include "amdgpu_psp.h"
|
||||
|
||||
struct amdgpu_hive_info {
|
||||
uint64_t hive_id;
|
||||
struct list_head device_list;
|
||||
struct psp_xgmi_topology_info topology_info;
|
||||
int number_devices;
|
||||
struct mutex hive_lock;
|
||||
};
|
||||
|
||||
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
|
||||
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
|
||||
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
|
||||
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
|
||||
|
||||
#endif
|
|
@ -743,19 +743,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)
|
|||
|
||||
if (pi->caps_sq_ramping || pi->caps_db_ramping ||
|
||||
pi->caps_td_ramping || pi->caps_tcp_ramping) {
|
||||
adev->gfx.rlc.funcs->enter_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_enter_safe_mode(adev);
|
||||
|
||||
if (enable) {
|
||||
ret = ci_program_pt_config_registers(adev, didt_config_ci);
|
||||
if (ret) {
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ci_do_enable_didt(adev, enable);
|
||||
|
||||
adev->gfx.rlc.funcs->exit_safe_mode(adev);
|
||||
amdgpu_gfx_rlc_exit_safe_mode(adev);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -1755,6 +1755,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
|
|||
.flush_hdp = &cik_flush_hdp,
|
||||
.invalidate_hdp = &cik_invalidate_hdp,
|
||||
.need_full_reset = &cik_need_full_reset,
|
||||
.init_doorbell_index = &legacy_doorbell_index_init,
|
||||
};
|
||||
|
||||
static int cik_common_early_init(void *handle)
|
||||
|
|
|
@ -30,4 +30,5 @@ void cik_srbm_select(struct amdgpu_device *adev,
|
|||
u32 me, u32 pipe, u32 queue, u32 vmid);
|
||||
int cik_set_ip_blocks(struct amdgpu_device *adev);
|
||||
|
||||
void legacy_doorbell_index_init(struct amdgpu_device *adev);
|
||||
#endif
|
||||
|
|
|
@ -228,34 +228,6 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
|
|||
* [127:96] - reserved
|
||||
*/
|
||||
|
||||
/**
|
||||
* cik_ih_prescreen_iv - prescreen an interrupt vector
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Returns true if the interrupt vector should be further processed.
|
||||
*/
|
||||
static bool cik_ih_prescreen_iv(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u16 pasid;
|
||||
|
||||
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
|
||||
case 146:
|
||||
case 147:
|
||||
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
|
||||
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
/* Not a VM fault */
|
||||
return true;
|
||||
}
|
||||
|
||||
adev->irq.ih.rptr += 16;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* cik_ih_decode_iv - decode an interrupt vector
|
||||
*
|
||||
|
@ -461,7 +433,6 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = {
|
|||
|
||||
static const struct amdgpu_ih_funcs cik_ih_funcs = {
|
||||
.get_wptr = cik_ih_get_wptr,
|
||||
.prescreen_iv = cik_ih_prescreen_iv,
|
||||
.decode_iv = cik_ih_decode_iv,
|
||||
.set_rptr = cik_ih_set_rptr
|
||||
};
|
||||
|
|
|
@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
|
|||
|
||||
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
||||
{
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
|
@ -218,9 +218,11 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
|
|||
* Schedule an IB in the DMA ring (CIK).
|
||||
*/
|
||||
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 extra_bits = vmid & 0xf;
|
||||
|
||||
/* IB packet must end on a 8 DW boundary */
|
||||
|
@ -316,8 +318,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
|
|||
WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
|
||||
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
|
||||
}
|
||||
sdma0->ready = false;
|
||||
sdma1->ready = false;
|
||||
sdma0->sched.ready = false;
|
||||
sdma1->sched.ready = false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -494,18 +496,16 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
|
|||
/* enable DMA IBs */
|
||||
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
|
||||
|
||||
ring->ready = true;
|
||||
ring->sched.ready = true;
|
||||
}
|
||||
|
||||
cik_sdma_enable(adev, true);
|
||||
|
||||
for (i = 0; i < adev->sdma.num_instances; i++) {
|
||||
ring = &adev->sdma.instance[i].ring;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (adev->mman.buffer_funcs_ring == ring)
|
||||
amdgpu_ttm_set_buffer_funcs_status(adev, true);
|
||||
|
@ -618,21 +618,17 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
|
|||
u64 gpu_addr;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 5);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_wb;
|
||||
|
||||
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
|
||||
amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
|
||||
amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
|
||||
|
@ -647,15 +643,11 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
|
|||
DRM_UDELAY(1);
|
||||
}
|
||||
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
|
||||
ring->idx, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_wb:
|
||||
amdgpu_device_wb_free(adev, index);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -678,20 +670,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
long r;
|
||||
|
||||
r = amdgpu_device_wb_get(adev, &index);
|
||||
if (r) {
|
||||
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
gpu_addr = adev->wb.gpu_addr + (index * 4);
|
||||
tmp = 0xCAFEDEAD;
|
||||
adev->wb.wb[index] = cpu_to_le32(tmp);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 256, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err0;
|
||||
}
|
||||
|
||||
ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
|
||||
SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
|
||||
|
@ -706,21 +694,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err1;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err1;
|
||||
}
|
||||
tmp = le32_to_cpu(adev->wb.wb[index]);
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
|
||||
else
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
err1:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
|
@ -822,7 +805,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
|
|||
*/
|
||||
static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
|
||||
{
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
|
||||
struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
|
||||
u32 pad_count;
|
||||
int i;
|
||||
|
||||
|
@ -1214,8 +1197,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
|
|||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
u8 instance_id;
|
||||
|
||||
DRM_ERROR("Illegal instruction in SDMA command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
instance_id = (entry->ring_id & 0x3) >> 0;
|
||||
drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -207,34 +207,6 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
|
|||
return (wptr & adev->irq.ih.ptr_mask);
|
||||
}
|
||||
|
||||
/**
|
||||
* cz_ih_prescreen_iv - prescreen an interrupt vector
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Returns true if the interrupt vector should be further processed.
|
||||
*/
|
||||
static bool cz_ih_prescreen_iv(struct amdgpu_device *adev)
|
||||
{
|
||||
u32 ring_index = adev->irq.ih.rptr >> 2;
|
||||
u16 pasid;
|
||||
|
||||
switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
|
||||
case 146:
|
||||
case 147:
|
||||
pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
|
||||
if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
/* Not a VM fault */
|
||||
return true;
|
||||
}
|
||||
|
||||
adev->irq.ih.rptr += 16;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* cz_ih_decode_iv - decode an interrupt vector
|
||||
*
|
||||
|
@ -442,7 +414,6 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = {
|
|||
|
||||
static const struct amdgpu_ih_funcs cz_ih_funcs = {
|
||||
.get_wptr = cz_ih_get_wptr,
|
||||
.prescreen_iv = cz_ih_prescreen_iv,
|
||||
.decode_iv = cz_ih_decode_iv,
|
||||
.set_rptr = cz_ih_set_rptr
|
||||
};
|
||||
|
|
|
@ -1775,18 +1775,15 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
|
|||
int r;
|
||||
|
||||
r = amdgpu_gfx_scratch_get(adev, &scratch);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
WREG32(scratch, 0xCAFEDEAD);
|
||||
|
||||
r = amdgpu_ring_alloc(ring, 3);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
if (r)
|
||||
goto error_free_scratch;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START));
|
||||
amdgpu_ring_write(ring, 0xDEADBEEF);
|
||||
|
@ -1798,13 +1795,11 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
|
|||
break;
|
||||
DRM_UDELAY(1);
|
||||
}
|
||||
if (i < adev->usec_timeout) {
|
||||
DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
|
||||
ring->idx, scratch, tmp);
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
if (i >= adev->usec_timeout)
|
||||
r = -ETIMEDOUT;
|
||||
|
||||
error_free_scratch:
|
||||
amdgpu_gfx_scratch_free(adev, scratch);
|
||||
return r;
|
||||
}
|
||||
|
@ -1845,9 +1840,11 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
|
|||
}
|
||||
|
||||
static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
|
||||
struct amdgpu_job *job,
|
||||
struct amdgpu_ib *ib,
|
||||
unsigned vmid, bool ctx_switch)
|
||||
bool ctx_switch)
|
||||
{
|
||||
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
|
||||
u32 header, control = 0;
|
||||
|
||||
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
|
||||
|
@ -1892,17 +1889,15 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
long r;
|
||||
|
||||
r = amdgpu_gfx_scratch_get(adev, &scratch);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
WREG32(scratch, 0xCAFEDEAD);
|
||||
memset(&ib, 0, sizeof(ib));
|
||||
r = amdgpu_ib_get(adev, NULL, 256, &ib);
|
||||
if (r) {
|
||||
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
|
||||
if (r)
|
||||
goto err1;
|
||||
}
|
||||
|
||||
ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1);
|
||||
ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START));
|
||||
ib.ptr[2] = 0xDEADBEEF;
|
||||
|
@ -1914,22 +1909,16 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
|
|||
|
||||
r = dma_fence_wait_timeout(f, false, timeout);
|
||||
if (r == 0) {
|
||||
DRM_ERROR("amdgpu: IB test timed out\n");
|
||||
r = -ETIMEDOUT;
|
||||
goto err2;
|
||||
} else if (r < 0) {
|
||||
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
|
||||
goto err2;
|
||||
}
|
||||
tmp = RREG32(scratch);
|
||||
if (tmp == 0xDEADBEEF) {
|
||||
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
|
||||
if (tmp == 0xDEADBEEF)
|
||||
r = 0;
|
||||
} else {
|
||||
DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
|
||||
scratch, tmp);
|
||||
else
|
||||
r = -EINVAL;
|
||||
}
|
||||
|
||||
err2:
|
||||
amdgpu_ib_free(adev, &ib, NULL);
|
||||
|
@ -1950,9 +1939,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
|
|||
CP_ME_CNTL__CE_HALT_MASK));
|
||||
WREG32(mmSCRATCH_UMSK, 0);
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
|
||||
adev->gfx.gfx_ring[i].ready = false;
|
||||
adev->gfx.gfx_ring[i].sched.ready = false;
|
||||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
adev->gfx.compute_ring[i].ready = false;
|
||||
adev->gfx.compute_ring[i].sched.ready = false;
|
||||
}
|
||||
udelay(50);
|
||||
}
|
||||
|
@ -2124,12 +2113,9 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
|
|||
|
||||
/* start the rings */
|
||||
gfx_v6_0_cp_gfx_start(adev);
|
||||
ring->ready = true;
|
||||
r = amdgpu_ring_test_ring(ring);
|
||||
if (r) {
|
||||
ring->ready = false;
|
||||
r = amdgpu_ring_test_helper(ring);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2227,14 +2213,11 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
|
|||
WREG32(mmCP_RB2_CNTL, tmp);
|
||||
WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8);
|
||||
|
||||
adev->gfx.compute_ring[0].ready = false;
|
||||
adev->gfx.compute_ring[1].ready = false;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]);
|
||||
r = amdgpu_ring_test_helper(&adev->gfx.compute_ring[i]);
|
||||
if (r)
|
||||
return r;
|
||||
adev->gfx.compute_ring[i].ready = true;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -2368,18 +2351,11 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
|
|||
amdgpu_ring_write(ring, val);
|
||||
}
|
||||
|
||||
static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
|
||||
{
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
|
||||
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
|
||||
}
|
||||
|
||||
static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
|
||||
{
|
||||
const u32 *src_ptr;
|
||||
volatile u32 *dst_ptr;
|
||||
u32 dws, i;
|
||||
u32 dws;
|
||||
u64 reg_list_mc_addr;
|
||||
const struct cs_section_def *cs_data;
|
||||
int r;
|
||||
|
@ -2394,26 +2370,10 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
|
|||
cs_data = adev->gfx.rlc.cs_data;
|
||||
|
||||
if (src_ptr) {
|
||||
/* save restore block */
|
||||
r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
|
||||
AMDGPU_GEM_DOMAIN_VRAM,
|
||||
&adev->gfx.rlc.save_restore_obj,
|
||||
&adev->gfx.rlc.save_restore_gpu_addr,
|
||||
(void **)&adev->gfx.rlc.sr_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC sr bo failed\n",
|
||||
r);
|
||||
gfx_v6_0_rlc_fini(adev);
|
||||
/* init save restore block */
|
||||
r = amdgpu_gfx_rlc_init_sr(adev, dws);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* write the sr buffer */
|
||||
dst_ptr = adev->gfx.rlc.sr_ptr;
|
||||
for (i = 0; i < adev->gfx.rlc.reg_list_size; i++)
|
||||
dst_ptr[i] = cpu_to_le32(src_ptr[i]);
|
||||
|
||||
amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj);
|
||||
amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj);
|
||||
}
|
||||
|
||||
if (cs_data) {
|
||||
|
@ -2428,7 +2388,7 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev)
|
|||
(void **)&adev->gfx.rlc.cs_ptr);
|
||||
if (r) {
|
||||
dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
|
||||
gfx_v6_0_rlc_fini(adev);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -2549,8 +2509,8 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
|
|||
if (!adev->gfx.rlc_fw)
|
||||
return -EINVAL;
|
||||
|
||||
gfx_v6_0_rlc_stop(adev);
|
||||
gfx_v6_0_rlc_reset(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
adev->gfx.rlc.funcs->reset(adev);
|
||||
gfx_v6_0_init_pg(adev);
|
||||
gfx_v6_0_init_cg(adev);
|
||||
|
||||
|
@ -2578,7 +2538,7 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev)
|
|||
WREG32(mmRLC_UCODE_ADDR, 0);
|
||||
|
||||
gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev));
|
||||
gfx_v6_0_rlc_start(adev);
|
||||
adev->gfx.rlc.funcs->start(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -3075,6 +3035,14 @@ static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
|
|||
.select_me_pipe_q = &gfx_v6_0_select_me_pipe_q
|
||||
};
|
||||
|
||||
static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
|
||||
.init = gfx_v6_0_rlc_init,
|
||||
.resume = gfx_v6_0_rlc_resume,
|
||||
.stop = gfx_v6_0_rlc_stop,
|
||||
.reset = gfx_v6_0_rlc_reset,
|
||||
.start = gfx_v6_0_rlc_start
|
||||
};
|
||||
|
||||
static int gfx_v6_0_early_init(void *handle)
|
||||
{
|
||||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
@ -3082,6 +3050,7 @@ static int gfx_v6_0_early_init(void *handle)
|
|||
adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
|
||||
adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS;
|
||||
adev->gfx.funcs = &gfx_v6_0_gfx_funcs;
|
||||
adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs;
|
||||
gfx_v6_0_set_ring_funcs(adev);
|
||||
gfx_v6_0_set_irq_funcs(adev);
|
||||
|
||||
|
@ -3114,7 +3083,7 @@ static int gfx_v6_0_sw_init(void *handle)
|
|||
return r;
|
||||
}
|
||||
|
||||
r = gfx_v6_0_rlc_init(adev);
|
||||
r = adev->gfx.rlc.funcs->init(adev);
|
||||
if (r) {
|
||||
DRM_ERROR("Failed to init rlc BOs!\n");
|
||||
return r;
|
||||
|
@ -3165,7 +3134,7 @@ static int gfx_v6_0_sw_fini(void *handle)
|
|||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
|
||||
|
||||
gfx_v6_0_rlc_fini(adev);
|
||||
amdgpu_gfx_rlc_fini(adev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -3177,7 +3146,7 @@ static int gfx_v6_0_hw_init(void *handle)
|
|||
|
||||
gfx_v6_0_constants_init(adev);
|
||||
|
||||
r = gfx_v6_0_rlc_resume(adev);
|
||||
r = adev->gfx.rlc.funcs->resume(adev);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -3195,7 +3164,7 @@ static int gfx_v6_0_hw_fini(void *handle)
|
|||
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
|
||||
|
||||
gfx_v6_0_cp_enable(adev, false);
|
||||
gfx_v6_0_rlc_stop(adev);
|
||||
adev->gfx.rlc.funcs->stop(adev);
|
||||
gfx_v6_0_fini_pg(adev);
|
||||
|
||||
return 0;
|
||||
|
@ -3393,12 +3362,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v6_0_fault(struct amdgpu_device *adev,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
struct amdgpu_ring *ring;
|
||||
|
||||
switch (entry->ring_id) {
|
||||
case 0:
|
||||
ring = &adev->gfx.gfx_ring[0];
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
ring = &adev->gfx.compute_ring[entry->ring_id - 1];
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
drm_sched_fault(&ring->sched);
|
||||
}
|
||||
|
||||
static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
|
||||
struct amdgpu_irq_src *source,
|
||||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal register access in command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v6_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3407,7 +3395,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
|
|||
struct amdgpu_iv_entry *entry)
|
||||
{
|
||||
DRM_ERROR("Illegal instruction in command stream\n");
|
||||
schedule_work(&adev->reset_work);
|
||||
gfx_v6_0_fault(adev, entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue