From 46b97aed5484a3f44584a10f9e0691bf89d29064 Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Mon, 5 Oct 2020 18:22:41 +0200 Subject: [PATCH 01/28] drm/mediatek: mtk_dpi: Fix unused variable 'mtk_dpi_encoder_funcs' Commit f89c696e7f63 ("drm/mediatek: mtk_dpi: Convert to bridge driver") introduced the following build warning with W=1 drivers/gpu/drm/mediatek/mtk_dpi.c:530:39: warning: unused variable 'mtk_dpi_encoder_funcs' [-Wunused-const-variable] static const struct drm_encoder_funcs mtk_dpi_encoder_funcs = { This struct is and the 'mtk_dpi_encoder_destroy()' are not needed anymore, so remove them. Fixes: f89c696e7f63 ("drm/mediatek: mtk_dpi: Convert to bridge driver") Reported-by: kernel test robot Signed-off-by: Enric Balletbo i Serra Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dpi.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index cf11c4850b40..52f11a63a330 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -522,15 +522,6 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, return 0; } -static void mtk_dpi_encoder_destroy(struct drm_encoder *encoder) -{ - drm_encoder_cleanup(encoder); -} - -static const struct drm_encoder_funcs mtk_dpi_encoder_funcs = { - .destroy = mtk_dpi_encoder_destroy, -}; - static int mtk_dpi_bridge_attach(struct drm_bridge *bridge, enum drm_bridge_attach_flags flags) { From 63495f6b4aede26e6f8fe3da69e5cfdd8a4ccc3b Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 29 Oct 2020 13:25:21 +0100 Subject: [PATCH 02/28] drm/vc4: hdmi: Make sure our clock rate is within limits The HDMI controller cannot go above a certain pixel rate limit depending on the generations, but that limit is only enforced in mode_valid at the moment, which means that we won't advertise modes that exceed that limit, but the userspace is still free to try to setup a mode that would. Implement atomic_check to make sure we check it in that scenario too. Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Link: https://patchwork.freedesktop.org/patch/msgid/20201029122522.1917579-1-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_hdmi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 95779d50cca0..600dd6d40309 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -760,6 +760,20 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) { } +static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct drm_display_mode *mode = &crtc_state->adjusted_mode; + struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); + unsigned long long pixel_rate = mode->clock * 1000; + + if (pixel_rate > vc4_hdmi->variant->max_pixel_clock) + return -EINVAL; + + return 0; +} + static enum drm_mode_status vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder, const struct drm_display_mode *mode) @@ -773,6 +787,7 @@ vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder, } static const struct drm_encoder_helper_funcs vc4_hdmi_encoder_helper_funcs = { + .atomic_check = vc4_hdmi_encoder_atomic_check, .mode_valid = vc4_hdmi_encoder_mode_valid, .disable = vc4_hdmi_encoder_disable, .enable = vc4_hdmi_encoder_enable, From 57fb32e632be4d406b4594829e3befdae1100c12 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 29 Oct 2020 13:25:22 +0100 Subject: [PATCH 03/28] drm/vc4: hdmi: Block odd horizontal timings The FIFO between the pixelvalve and the HDMI controller runs at 2 pixels per clock cycle, and cannot deal with odd timings. Let's reject any mode with such timings. Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Link: https://patchwork.freedesktop.org/patch/msgid/20201029122522.1917579-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_hdmi.c | 12 ++++++++++++ drivers/gpu/drm/vc4/vc4_hdmi.h | 3 +++ 2 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 600dd6d40309..4689a7338ac5 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -768,6 +768,11 @@ static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder, struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); unsigned long long pixel_rate = mode->clock * 1000; + if (vc4_hdmi->variant->unsupported_odd_h_timings && + ((mode->hdisplay % 2) || (mode->hsync_start % 2) || + (mode->hsync_end % 2) || (mode->htotal % 2))) + return -EINVAL; + if (pixel_rate > vc4_hdmi->variant->max_pixel_clock) return -EINVAL; @@ -780,6 +785,11 @@ vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder, { struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); + if (vc4_hdmi->variant->unsupported_odd_h_timings && + ((mode->hdisplay % 2) || (mode->hsync_start % 2) || + (mode->hsync_end % 2) || (mode->htotal % 2))) + return MODE_H_ILLEGAL; + if ((mode->clock * 1000) > vc4_hdmi->variant->max_pixel_clock) return MODE_CLOCK_HIGH; @@ -1832,6 +1842,7 @@ static const struct vc4_hdmi_variant bcm2711_hdmi0_variant = { PHY_LANE_2, PHY_LANE_CK, }, + .unsupported_odd_h_timings = true, .init_resources = vc5_hdmi_init_resources, .csc_setup = vc5_hdmi_csc_setup, @@ -1857,6 +1868,7 @@ static const struct vc4_hdmi_variant bcm2711_hdmi1_variant = { PHY_LANE_CK, PHY_LANE_2, }, + .unsupported_odd_h_timings = true, .init_resources = vc5_hdmi_init_resources, .csc_setup = vc5_hdmi_csc_setup, diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h index 63c6f8bddf1d..6815e93b1a48 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi.h @@ -62,6 +62,9 @@ struct vc4_hdmi_variant { */ enum vc4_hdmi_phy_channel phy_lane_mapping[4]; + /* The BCM2711 cannot deal with odd horizontal pixel timings */ + bool unsupported_odd_h_timings; + /* Callback to get the resources (memory region, interrupts, * clocks, etc) for that variant. */ From 3c354ed1c43dabbdaae8569f982cdcccfdecd6a8 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 5 Nov 2020 14:56:50 +0100 Subject: [PATCH 04/28] drm/vc4: kms: Switch to drmm_add_action_or_reset Even though it was pointed in the review by Daniel, and I thought to have fixed it while applying the patches, but it turns out I forgot to commit the fixes in the process. Properly fix it this time. Fixes: dcda7c28bff2 ("drm/vc4: kms: Add functions to create the state objects") Signed-off-by: Maxime Ripard Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20201105135656.383350-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 2b951cae04ad..44db31e16e91 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -113,7 +113,7 @@ static int vc4_ctm_obj_init(struct vc4_dev *vc4) drm_atomic_private_obj_init(&vc4->base, &vc4->ctm_manager, &ctm_state->base, &vc4_ctm_state_funcs); - return drmm_add_action(&vc4->base, vc4_ctm_obj_fini, NULL); + return drmm_add_action_or_reset(&vc4->base, vc4_ctm_obj_fini, NULL); } /* Converts a DRM S31.32 value to the HW S0.9 format. */ @@ -657,7 +657,7 @@ static int vc4_load_tracker_obj_init(struct vc4_dev *vc4) &load_state->base, &vc4_load_tracker_state_funcs); - return drmm_add_action(&vc4->base, vc4_load_tracker_obj_fini, NULL); + return drmm_add_action_or_reset(&vc4->base, vc4_load_tracker_obj_fini, NULL); } #define NUM_OUTPUTS 6 From 213189dbe7a1d7b1032aca4eacb0348a3ed67823 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 5 Nov 2020 14:56:51 +0100 Subject: [PATCH 05/28] drm/vc4: kms: Remove useless define NUM_OUTPUTS isn't used anymore, let's remove it. Signed-off-by: Maxime Ripard Tested-by: Hoegeun Kwon Reviewed-by: Hoegeun Kwon Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20201105135656.383350-3-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 44db31e16e91..4b558ccb18fe 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -660,7 +660,6 @@ static int vc4_load_tracker_obj_init(struct vc4_dev *vc4) return drmm_add_action_or_reset(&vc4->base, vc4_load_tracker_obj_fini, NULL); } -#define NUM_OUTPUTS 6 #define NUM_CHANNELS 3 static int From a9661f27dc6bfbb6869b07cf68f9c2fd05167746 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 5 Nov 2020 14:56:52 +0100 Subject: [PATCH 06/28] drm/vc4: kms: Rename NUM_CHANNELS The NUM_CHANNELS define has a pretty generic name and was right before the function using it. Let's move to something that makes the hardware-specific nature more obvious, and to a more appropriate place. Signed-off-by: Maxime Ripard Tested-by: Hoegeun Kwon Reviewed-by: Hoegeun Kwon Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20201105135656.383350-4-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 4b558ccb18fe..ad69c70f66a2 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -24,6 +24,8 @@ #include "vc4_drv.h" #include "vc4_regs.h" +#define HVS_NUM_CHANNELS 3 + struct vc4_ctm_state { struct drm_private_state base; struct drm_color_ctm *ctm; @@ -660,12 +662,10 @@ static int vc4_load_tracker_obj_init(struct vc4_dev *vc4) return drmm_add_action_or_reset(&vc4->base, vc4_load_tracker_obj_fini, NULL); } -#define NUM_CHANNELS 3 - static int vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) { - unsigned long unassigned_channels = GENMASK(NUM_CHANNELS - 1, 0); + unsigned long unassigned_channels = GENMASK(HVS_NUM_CHANNELS - 1, 0); struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct drm_crtc *crtc; int i, ret; From a72b0458cd5123b40dd5084f6e536af63aeacda1 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 5 Nov 2020 14:56:53 +0100 Subject: [PATCH 07/28] drm/vc4: kms: Split the HVS muxing check in a separate function The code that assigns HVS channels during atomic_check is starting to grow a bit big, let's move it into a separate function. Signed-off-by: Maxime Ripard Tested-by: Hoegeun Kwon Reviewed-by: Hoegeun Kwon Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20201105135656.383350-5-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index ad69c70f66a2..bb2efc5d2d01 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -662,13 +662,13 @@ static int vc4_load_tracker_obj_init(struct vc4_dev *vc4) return drmm_add_action_or_reset(&vc4->base, vc4_load_tracker_obj_fini, NULL); } -static int -vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +static int vc4_pv_muxing_atomic_check(struct drm_device *dev, + struct drm_atomic_state *state) { unsigned long unassigned_channels = GENMASK(HVS_NUM_CHANNELS - 1, 0); struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct drm_crtc *crtc; - int i, ret; + unsigned int i; /* * Since the HVS FIFOs are shared across all the pixelvalves and @@ -741,6 +741,18 @@ vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) } } + return 0; +} + +static int +vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + int ret; + + ret = vc4_pv_muxing_atomic_check(dev, state); + if (ret) + return ret; + ret = vc4_ctm_atomic_check(dev, state); if (ret < 0) return ret; From b5dbc4d36885bef6257054a737a76101d293b185 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 5 Nov 2020 14:56:54 +0100 Subject: [PATCH 08/28] drm/vc4: kms: Document the muxing corner cases We've had a number of muxing corner-cases with specific ways to reproduce them, so let's document them to make sure they aren't lost and introduce regressions later on. Signed-off-by: Maxime Ripard Tested-by: Hoegeun Kwon Reviewed-by: Hoegeun Kwon Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20201105135656.383350-6-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_kms.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index bb2efc5d2d01..0f44fc526fd2 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -662,6 +662,28 @@ static int vc4_load_tracker_obj_init(struct vc4_dev *vc4) return drmm_add_action_or_reset(&vc4->base, vc4_load_tracker_obj_fini, NULL); } +/* + * The BCM2711 HVS has up to 7 outputs connected to the pixelvalves and + * the TXP (and therefore all the CRTCs found on that platform). + * + * The naive (and our initial) implementation would just iterate over + * all the active CRTCs, try to find a suitable FIFO, and then remove it + * from the pool of available FIFOs. However, there are a few corner + * cases that need to be considered: + * + * - When running in a dual-display setup (so with two CRTCs involved), + * we can update the state of a single CRTC (for example by changing + * its mode using xrandr under X11) without affecting the other. In + * this case, the other CRTC wouldn't be in the state at all, so we + * need to consider all the running CRTCs in the DRM device to assign + * a FIFO, not just the one in the state. + * + * - Since we need the pixelvalve to be disabled and enabled back when + * the FIFO is changed, we should keep the FIFO assigned for as long + * as the CRTC is enabled, only considering it free again once that + * CRTC has been disabled. This can be tested by booting X11 on a + * single display, and changing the resolution down and then back up. + */ static int vc4_pv_muxing_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) { From 8d15aa4ed02bed2f5b0720480ab8eb032dc0887e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 29 Oct 2020 14:40:16 +0100 Subject: [PATCH 09/28] dt-bindings: display: Add a property to deal with WiFi coexistence The RaspberryPi4 has both a WiFi chip and HDMI outputs capable of doing 4k. Unfortunately, the 1440p resolution at 60Hz has a TMDS rate on the HDMI cable right in the middle of the first Wifi channel. Add a property to our HDMI controller, that could be reused by other similar HDMI controllers, to allow the OS to take whatever measure is necessary to avoid that crosstalk. Signed-off-by: Maxime Ripard Reviewed-by: Nicolas Saenz Julienne Reviewed-by: Rob Herring Link: https://patchwork.freedesktop.org/patch/msgid/20201029134018.1948636-1-maxime@cerno.tech --- .../devicetree/bindings/display/brcm,bcm2711-hdmi.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml index 03a76729d26c..7ce06f9f9f8e 100644 --- a/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml @@ -76,6 +76,12 @@ properties: resets: maxItems: 1 + wifi-2.4ghz-coexistence: + type: boolean + description: > + Should the pixel frequencies in the WiFi frequencies range be + avoided? + required: - compatible - reg From 9fa1d7e60ad5ad2f7859ea8912d7b0b57821a5b7 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 29 Oct 2020 14:40:17 +0100 Subject: [PATCH 10/28] drm/vc4: hdmi: Disable Wifi Frequencies There's cross-talk on the RPi4 between the 2.4GHz channels used by the WiFi chip and some resolutions, most notably 1440p at 60Hz. In such a case, we can either reject entirely the mode, or lower slightly the pixel frequency to remove the overlap. Let's go for the latter. Signed-off-by: Maxime Ripard Acked-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20201029134018.1948636-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_hdmi.c | 21 +++++++++++++++++++++ drivers/gpu/drm/vc4/vc4_hdmi.h | 8 ++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 4689a7338ac5..afc178b0d89f 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -760,6 +760,9 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) { } +#define WIFI_2_4GHz_CH1_MIN_FREQ 2400000000ULL +#define WIFI_2_4GHz_CH1_MAX_FREQ 2422000000ULL + static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) @@ -767,12 +770,27 @@ static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder, struct drm_display_mode *mode = &crtc_state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); unsigned long long pixel_rate = mode->clock * 1000; + unsigned long long tmds_rate; if (vc4_hdmi->variant->unsupported_odd_h_timings && ((mode->hdisplay % 2) || (mode->hsync_start % 2) || (mode->hsync_end % 2) || (mode->htotal % 2))) return -EINVAL; + /* + * The 1440p@60 pixel rate is in the same range than the first + * WiFi channel (between 2.4GHz and 2.422GHz with 22MHz + * bandwidth). Slightly lower the frequency to bring it out of + * the WiFi range. + */ + tmds_rate = pixel_rate * 10; + if (vc4_hdmi->disable_wifi_frequencies && + (tmds_rate >= WIFI_2_4GHz_CH1_MIN_FREQ && + tmds_rate <= WIFI_2_4GHz_CH1_MAX_FREQ)) { + mode->clock = 238560; + pixel_rate = mode->clock * 1000; + } + if (pixel_rate > vc4_hdmi->variant->max_pixel_clock) return -EINVAL; @@ -1719,6 +1737,9 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4_hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW; } + vc4_hdmi->disable_wifi_frequencies = + of_property_read_bool(dev->of_node, "wifi-2.4ghz-coexistence"); + pm_runtime_enable(dev); drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h index 6815e93b1a48..0526a9cf608a 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi.h @@ -142,6 +142,14 @@ struct vc4_hdmi { int hpd_gpio; bool hpd_active_low; + /* + * On some systems (like the RPi4), some modes are in the same + * frequency range than the WiFi channels (1440p@60Hz for + * example). Should we take evasive actions because that system + * has a wifi adapter? + */ + bool disable_wifi_frequencies; + struct cec_adapter *cec_adap; struct cec_msg cec_rx_msg; bool cec_tx_ok; From 487778f8d22fcdebb6436f0a5f96484ffa237b0b Mon Sep 17 00:00:00 2001 From: CK Hu Date: Fri, 13 Nov 2020 11:49:07 +0800 Subject: [PATCH 11/28] drm/mediatek: dsi: Modify horizontal front/back porch byte formula In the patch to be fixed, horizontal_backporch_byte become too large for some panel, so roll back that patch. For small hfp or hbp panel, using vm->hfront_porch + vm->hback_porch to calculate horizontal_backporch_byte would make it negtive, so use horizontal_backporch_byte itself to make it positive. Fixes: 35bf948f1edb ("drm/mediatek: dsi: Fix scrolling of panel with small hfp or hbp") Signed-off-by: CK Hu Signed-off-by: Chun-Kuang Hu Tested-by: Bilal Wasim --- drivers/gpu/drm/mediatek/mtk_dsi.c | 57 +++++++++++------------------- 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 4a188a942c38..65fd99c528af 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -444,7 +444,10 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi) u32 horizontal_sync_active_byte; u32 horizontal_backporch_byte; u32 horizontal_frontporch_byte; + u32 horizontal_front_back_byte; + u32 data_phy_cycles_byte; u32 dsi_tmp_buf_bpp, data_phy_cycles; + u32 delta; struct mtk_phy_timing *timing = &dsi->phy_timing; struct videomode *vm = &dsi->vm; @@ -466,50 +469,30 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi) horizontal_sync_active_byte = (vm->hsync_len * dsi_tmp_buf_bpp - 10); if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) - horizontal_backporch_byte = vm->hback_porch * dsi_tmp_buf_bpp; + horizontal_backporch_byte = vm->hback_porch * dsi_tmp_buf_bpp - 10; else horizontal_backporch_byte = (vm->hback_porch + vm->hsync_len) * - dsi_tmp_buf_bpp; + dsi_tmp_buf_bpp - 10; data_phy_cycles = timing->lpx + timing->da_hs_prepare + - timing->da_hs_zero + timing->da_hs_exit; + timing->da_hs_zero + timing->da_hs_exit + 3; - if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) { - if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp > - data_phy_cycles * dsi->lanes + 18) { - horizontal_frontporch_byte = - vm->hfront_porch * dsi_tmp_buf_bpp - - (data_phy_cycles * dsi->lanes + 18) * - vm->hfront_porch / - (vm->hfront_porch + vm->hback_porch); + delta = dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST ? 18 : 12; - horizontal_backporch_byte = - horizontal_backporch_byte - - (data_phy_cycles * dsi->lanes + 18) * - vm->hback_porch / - (vm->hfront_porch + vm->hback_porch); - } else { - DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n"); - horizontal_frontporch_byte = vm->hfront_porch * - dsi_tmp_buf_bpp; - } + horizontal_frontporch_byte = vm->hfront_porch * dsi_tmp_buf_bpp; + horizontal_front_back_byte = horizontal_frontporch_byte + horizontal_backporch_byte; + data_phy_cycles_byte = data_phy_cycles * dsi->lanes + delta; + + if (horizontal_front_back_byte > data_phy_cycles_byte) { + horizontal_frontporch_byte -= data_phy_cycles_byte * + horizontal_frontporch_byte / + horizontal_front_back_byte; + + horizontal_backporch_byte -= data_phy_cycles_byte * + horizontal_backporch_byte / + horizontal_front_back_byte; } else { - if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp > - data_phy_cycles * dsi->lanes + 12) { - horizontal_frontporch_byte = - vm->hfront_porch * dsi_tmp_buf_bpp - - (data_phy_cycles * dsi->lanes + 12) * - vm->hfront_porch / - (vm->hfront_porch + vm->hback_porch); - horizontal_backporch_byte = horizontal_backporch_byte - - (data_phy_cycles * dsi->lanes + 12) * - vm->hback_porch / - (vm->hfront_porch + vm->hback_porch); - } else { - DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n"); - horizontal_frontporch_byte = vm->hfront_porch * - dsi_tmp_buf_bpp; - } + DRM_WARN("HFP + HBP less than d-phy, FPS will under 60Hz\n"); } writel(horizontal_sync_active_byte, dsi->regs + DSI_HSA_WC); From e2d3d2e904ad3d381753798dcd5cae03e3c47242 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 16 Nov 2020 18:53:00 +0100 Subject: [PATCH 12/28] drm/exynos: depend on COMMON_CLK to fix compile tests The Exynos DRM uses Common Clock Framework thus it cannot be built on platforms without it (e.g. compile test on MIPS with RALINK and SOC_RT305X): /usr/bin/mips-linux-gnu-ld: drivers/gpu/drm/exynos/exynos_mixer.o: in function `mixer_bind': exynos_mixer.c:(.text+0x958): undefined reference to `clk_set_parent' Reported-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 6417f374b923..951d5f708e92 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_EXYNOS tristate "DRM Support for Samsung SoC Exynos Series" - depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM || COMPILE_TEST) + depends on OF && DRM && COMMON_CLK + depends on ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM || COMPILE_TEST depends on MMU select DRM_KMS_HELPER select VIDEOMODE_HELPERS From f2df84e096a8254ddb18c531b185fc2a45879077 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 20 Nov 2020 15:42:44 +0100 Subject: [PATCH 13/28] drm/vc4: kms: Store the unassigned channel list in the state If a CRTC is enabled but not active, and that we're then doing a page flip on another CRTC, drm_atomic_get_crtc_state will bring the first CRTC state into the global state, and will make us wait for its vblank as well, even though that might never occur. Instead of creating the list of the free channels each time atomic_check is called, and calling drm_atomic_get_crtc_state to retrieve the allocated channels, let's create a private state object in the main atomic state, and use it to store the available channels. Since vc4 has a semaphore (with a value of 1, so a lock) in its commit implementation to serialize all the commits, even the nonblocking ones, we are free from the use-after-free race if two subsequent commits are not ran in their submission order. Fixes: 87ebcd42fb7b ("drm/vc4: crtc: Assign output to channel automatically") Signed-off-by: Maxime Ripard Tested-by: Hoegeun Kwon Reviewed-by: Hoegeun Kwon Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20201120144245.398711-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_drv.h | 1 + drivers/gpu/drm/vc4/vc4_kms.c | 126 +++++++++++++++++++++++++++------- 2 files changed, 102 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 19b75bebd35f..fcfeef0949af 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -219,6 +219,7 @@ struct vc4_dev { struct drm_modeset_lock ctm_state_lock; struct drm_private_obj ctm_manager; + struct drm_private_obj hvs_channels; struct drm_private_obj load_tracker; /* List of vc4_debugfs_info_entry for adding to debugfs once diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 0f44fc526fd2..0bbd7b554275 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -37,6 +37,17 @@ static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv) return container_of(priv, struct vc4_ctm_state, base); } +struct vc4_hvs_state { + struct drm_private_state base; + unsigned int unassigned_channels; +}; + +static struct vc4_hvs_state * +to_vc4_hvs_state(struct drm_private_state *priv) +{ + return container_of(priv, struct vc4_hvs_state, base); +} + struct vc4_load_tracker_state { struct drm_private_state base; u64 hvs_load; @@ -171,6 +182,19 @@ vc4_ctm_commit(struct vc4_dev *vc4, struct drm_atomic_state *state) VC4_SET_FIELD(ctm_state->fifo, SCALER_OLEDOFFS_DISPFIFO)); } +static struct vc4_hvs_state * +vc4_hvs_get_global_state(struct drm_atomic_state *state) +{ + struct vc4_dev *vc4 = to_vc4_dev(state->dev); + struct drm_private_state *priv_state; + + priv_state = drm_atomic_get_private_obj_state(state, &vc4->hvs_channels); + if (IS_ERR(priv_state)) + return ERR_CAST(priv_state); + + return to_vc4_hvs_state(priv_state); +} + static void vc4_hvs_pv_muxing_commit(struct vc4_dev *vc4, struct drm_atomic_state *state) { @@ -662,6 +686,59 @@ static int vc4_load_tracker_obj_init(struct vc4_dev *vc4) return drmm_add_action_or_reset(&vc4->base, vc4_load_tracker_obj_fini, NULL); } +static struct drm_private_state * +vc4_hvs_channels_duplicate_state(struct drm_private_obj *obj) +{ + struct vc4_hvs_state *old_state = to_vc4_hvs_state(obj->state); + struct vc4_hvs_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base); + + state->unassigned_channels = old_state->unassigned_channels; + + return &state->base; +} + +static void vc4_hvs_channels_destroy_state(struct drm_private_obj *obj, + struct drm_private_state *state) +{ + struct vc4_hvs_state *hvs_state = to_vc4_hvs_state(state); + + kfree(hvs_state); +} + +static const struct drm_private_state_funcs vc4_hvs_state_funcs = { + .atomic_duplicate_state = vc4_hvs_channels_duplicate_state, + .atomic_destroy_state = vc4_hvs_channels_destroy_state, +}; + +static void vc4_hvs_channels_obj_fini(struct drm_device *dev, void *unused) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + + drm_atomic_private_obj_fini(&vc4->hvs_channels); +} + +static int vc4_hvs_channels_obj_init(struct vc4_dev *vc4) +{ + struct vc4_hvs_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + state->unassigned_channels = GENMASK(HVS_NUM_CHANNELS - 1, 0); + drm_atomic_private_obj_init(&vc4->base, &vc4->hvs_channels, + &state->base, + &vc4_hvs_state_funcs); + + return drmm_add_action_or_reset(&vc4->base, vc4_hvs_channels_obj_fini, NULL); +} + /* * The BCM2711 HVS has up to 7 outputs connected to the pixelvalves and * the TXP (and therefore all the CRTCs found on that platform). @@ -678,6 +755,14 @@ static int vc4_load_tracker_obj_init(struct vc4_dev *vc4) * need to consider all the running CRTCs in the DRM device to assign * a FIFO, not just the one in the state. * + * - To fix the above, we can't use drm_atomic_get_crtc_state on all + * enabled CRTCs to pull their CRTC state into the global state, since + * a page flip would start considering their vblank to complete. Since + * we don't have a guarantee that they are actually active, that + * vblank might never happen, and shouldn't even be considered if we + * want to do a page flip on a single CRTC. That can be tested by + * doing a modetest -v first on HDMI1 and then on HDMI0. + * * - Since we need the pixelvalve to be disabled and enabled back when * the FIFO is changed, we should keep the FIFO assigned for as long * as the CRTC is enabled, only considering it free again once that @@ -687,46 +772,33 @@ static int vc4_load_tracker_obj_init(struct vc4_dev *vc4) static int vc4_pv_muxing_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) { - unsigned long unassigned_channels = GENMASK(HVS_NUM_CHANNELS - 1, 0); + struct vc4_hvs_state *hvs_new_state; struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct drm_crtc *crtc; unsigned int i; - /* - * Since the HVS FIFOs are shared across all the pixelvalves and - * the TXP (and thus all the CRTCs), we need to pull the current - * state of all the enabled CRTCs so that an update to a single - * CRTC still keeps the previous FIFOs enabled and assigned to - * the same CRTCs, instead of evaluating only the CRTC being - * modified. - */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - struct drm_crtc_state *crtc_state; - - if (!crtc->state->enable) - continue; - - crtc_state = drm_atomic_get_crtc_state(state, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - } + hvs_new_state = vc4_hvs_get_global_state(state); + if (!hvs_new_state) + return -EINVAL; for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + struct vc4_crtc_state *old_vc4_crtc_state = + to_vc4_crtc_state(old_crtc_state); struct vc4_crtc_state *new_vc4_crtc_state = to_vc4_crtc_state(new_crtc_state); struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); unsigned int matching_channels; - if (old_crtc_state->enable && !new_crtc_state->enable) + if (old_crtc_state->enable && !new_crtc_state->enable) { + hvs_new_state->unassigned_channels |= BIT(old_vc4_crtc_state->assigned_channel); new_vc4_crtc_state->assigned_channel = VC4_HVS_CHANNEL_DISABLED; + } if (!new_crtc_state->enable) continue; - if (new_vc4_crtc_state->assigned_channel != VC4_HVS_CHANNEL_DISABLED) { - unassigned_channels &= ~BIT(new_vc4_crtc_state->assigned_channel); + if (new_vc4_crtc_state->assigned_channel != VC4_HVS_CHANNEL_DISABLED) continue; - } /* * The problem we have to solve here is that we have @@ -752,12 +824,12 @@ static int vc4_pv_muxing_atomic_check(struct drm_device *dev, * the future, we will need to have something smarter, * but it works so far. */ - matching_channels = unassigned_channels & vc4_crtc->data->hvs_available_channels; + matching_channels = hvs_new_state->unassigned_channels & vc4_crtc->data->hvs_available_channels; if (matching_channels) { unsigned int channel = ffs(matching_channels) - 1; new_vc4_crtc_state->assigned_channel = channel; - unassigned_channels &= ~BIT(channel); + hvs_new_state->unassigned_channels &= ~BIT(channel); } else { return -EINVAL; } @@ -841,6 +913,10 @@ int vc4_kms_load(struct drm_device *dev) if (ret) return ret; + ret = vc4_hvs_channels_obj_init(vc4); + if (ret) + return ret; + drm_mode_config_reset(dev); drm_kms_helper_poll_init(dev); From 2820526dd5c27326d9c0d2c831a34b8f14e7c404 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 20 Nov 2020 15:42:45 +0100 Subject: [PATCH 14/28] drm/vc4: kms: Don't disable the muxing of an active CRTC The current HVS muxing code will consider the CRTCs in a given state to setup their muxing in the HVS, and disable the other CRTCs muxes. However, it's valid to only update a single CRTC with a state, and in this situation we would mux out a CRTC that was enabled but left untouched by the new state. Fix this by setting a flag on the CRTC state when the muxing has been changed, and only change the muxing configuration when that flag is there. Fixes: 87ebcd42fb7b ("drm/vc4: crtc: Assign output to channel automatically") Signed-off-by: Maxime Ripard Tested-by: Hoegeun Kwon Reviewed-by: Hoegeun Kwon Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20201120144245.398711-3-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_drv.h | 3 ++ drivers/gpu/drm/vc4/vc4_kms.c | 81 +++++++++++++++++++---------------- 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index fcfeef0949af..c5f2944d5bc6 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -532,6 +532,9 @@ struct vc4_crtc_state { unsigned int top; unsigned int bottom; } margins; + + /* Transitional state below, only valid during atomic commits */ + bool update_muxing; }; #define VC4_HVS_CHANNEL_DISABLED ((unsigned int)-1) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 0bbd7b554275..ba310c0ab5f6 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -239,10 +239,7 @@ static void vc5_hvs_pv_muxing_commit(struct vc4_dev *vc4, { struct drm_crtc_state *crtc_state; struct drm_crtc *crtc; - unsigned char dsp2_mux = 0; - unsigned char dsp3_mux = 3; - unsigned char dsp4_mux = 3; - unsigned char dsp5_mux = 3; + unsigned char mux; unsigned int i; u32 reg; @@ -250,50 +247,59 @@ static void vc5_hvs_pv_muxing_commit(struct vc4_dev *vc4, struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state); struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); - if (!crtc_state->active) + if (!vc4_state->update_muxing) continue; switch (vc4_crtc->data->hvs_output) { case 2: - dsp2_mux = (vc4_state->assigned_channel == 2) ? 0 : 1; + mux = (vc4_state->assigned_channel == 2) ? 0 : 1; + reg = HVS_READ(SCALER_DISPECTRL); + HVS_WRITE(SCALER_DISPECTRL, + (reg & ~SCALER_DISPECTRL_DSP2_MUX_MASK) | + VC4_SET_FIELD(mux, SCALER_DISPECTRL_DSP2_MUX)); break; case 3: - dsp3_mux = vc4_state->assigned_channel; + if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED) + mux = 3; + else + mux = vc4_state->assigned_channel; + + reg = HVS_READ(SCALER_DISPCTRL); + HVS_WRITE(SCALER_DISPCTRL, + (reg & ~SCALER_DISPCTRL_DSP3_MUX_MASK) | + VC4_SET_FIELD(mux, SCALER_DISPCTRL_DSP3_MUX)); break; case 4: - dsp4_mux = vc4_state->assigned_channel; + if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED) + mux = 3; + else + mux = vc4_state->assigned_channel; + + reg = HVS_READ(SCALER_DISPEOLN); + HVS_WRITE(SCALER_DISPEOLN, + (reg & ~SCALER_DISPEOLN_DSP4_MUX_MASK) | + VC4_SET_FIELD(mux, SCALER_DISPEOLN_DSP4_MUX)); + break; case 5: - dsp5_mux = vc4_state->assigned_channel; + if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED) + mux = 3; + else + mux = vc4_state->assigned_channel; + + reg = HVS_READ(SCALER_DISPDITHER); + HVS_WRITE(SCALER_DISPDITHER, + (reg & ~SCALER_DISPDITHER_DSP5_MUX_MASK) | + VC4_SET_FIELD(mux, SCALER_DISPDITHER_DSP5_MUX)); break; default: break; } } - - reg = HVS_READ(SCALER_DISPECTRL); - HVS_WRITE(SCALER_DISPECTRL, - (reg & ~SCALER_DISPECTRL_DSP2_MUX_MASK) | - VC4_SET_FIELD(dsp2_mux, SCALER_DISPECTRL_DSP2_MUX)); - - reg = HVS_READ(SCALER_DISPCTRL); - HVS_WRITE(SCALER_DISPCTRL, - (reg & ~SCALER_DISPCTRL_DSP3_MUX_MASK) | - VC4_SET_FIELD(dsp3_mux, SCALER_DISPCTRL_DSP3_MUX)); - - reg = HVS_READ(SCALER_DISPEOLN); - HVS_WRITE(SCALER_DISPEOLN, - (reg & ~SCALER_DISPEOLN_DSP4_MUX_MASK) | - VC4_SET_FIELD(dsp4_mux, SCALER_DISPEOLN_DSP4_MUX)); - - reg = HVS_READ(SCALER_DISPDITHER); - HVS_WRITE(SCALER_DISPDITHER, - (reg & ~SCALER_DISPDITHER_DSP5_MUX_MASK) | - VC4_SET_FIELD(dsp5_mux, SCALER_DISPDITHER_DSP5_MUX)); } static void @@ -789,17 +795,20 @@ static int vc4_pv_muxing_atomic_check(struct drm_device *dev, struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); unsigned int matching_channels; - if (old_crtc_state->enable && !new_crtc_state->enable) { + /* Nothing to do here, let's skip it */ + if (old_crtc_state->enable == new_crtc_state->enable) + continue; + + /* Muxing will need to be modified, mark it as such */ + new_vc4_crtc_state->update_muxing = true; + + /* If we're disabling our CRTC, we put back our channel */ + if (!new_crtc_state->enable) { hvs_new_state->unassigned_channels |= BIT(old_vc4_crtc_state->assigned_channel); new_vc4_crtc_state->assigned_channel = VC4_HVS_CHANNEL_DISABLED; + continue; } - if (!new_crtc_state->enable) - continue; - - if (new_vc4_crtc_state->assigned_channel != VC4_HVS_CHANNEL_DISABLED) - continue; - /* * The problem we have to solve here is that we have * up to 7 encoders, connected to up to 6 CRTCs. From 0305613dbcf42b6b27ddf516fea2738dfbfdb7c0 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 17 Nov 2020 15:01:24 +0200 Subject: [PATCH 15/28] drm/i915/perf: workaround register corruption in OATAILPTR After having written the entire OA buffer with reports, the HW will write again at the beginning of the OA buffer. It'll indicate it by setting the WRAP bits in the OASTATUS register. When a wrap happens and that at the end of the read vfunc we write the OASTATUS register back to clear the REPORT_LOST bit, we sometimes see that the OATAILPTR register is reset to a previous position on Gen8/9 (apparently not the case on Gen11+). This leads the next call to the read vfunc to process reports we've already read. Because we've marked those as read by clearing the reason & timestamp dwords, they're discarded and a "Skipping spurious, invalid OA report" message is emitted. The workaround to avoid this OATAILPTR value reset seems to be to set the wrap bits when writing back OASTATUS. This change has no impact on userspace, it only avoids a bunch of DRM_NOTE("Skipping spurious, invalid OA report\n") messages. Signed-off-by: Lionel Landwerlin Fixes: 19f81df2859eb1 ("drm/i915/perf: Add OA unit support for Gen 8+") Reviewed-by: Umesh Nerlige Ramappa Link: https://patchwork.freedesktop.org/patch/msgid/20201117130124.829979-1-lionel.g.landwerlin@intel.com (cherry picked from commit 059a0beb486344a577ff476acce75e69eab704be) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_perf.c | 9 +++++++-- drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e94976976571..3640d0e229d2 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -909,8 +909,13 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - intel_uncore_write(uncore, oastatus_reg, - oastatus & ~GEN8_OASTATUS_REPORT_LOST); + + intel_uncore_rmw(uncore, oastatus_reg, + GEN8_OASTATUS_COUNTER_OVERFLOW | + GEN8_OASTATUS_REPORT_LOST, + IS_GEN_RANGE(uncore->i915, 8, 10) ? + (GEN8_OASTATUS_HEAD_POINTER_WRAP | + GEN8_OASTATUS_TAIL_POINTER_WRAP) : 0); } return gen8_append_oa_reports(stream, buf, count, offset); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 664f3bf9af03..5cd83eac940c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -676,6 +676,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN7_OASTATUS2_MEM_SELECT_GGTT (1 << 0) /* 0: PPGTT, 1: GGTT */ #define GEN8_OASTATUS _MMIO(0x2b08) +#define GEN8_OASTATUS_TAIL_POINTER_WRAP (1 << 17) +#define GEN8_OASTATUS_HEAD_POINTER_WRAP (1 << 16) #define GEN8_OASTATUS_OVERRUN_STATUS (1 << 3) #define GEN8_OASTATUS_COUNTER_OVERFLOW (1 << 2) #define GEN8_OASTATUS_OABUFFER_OVERFLOW (1 << 1) From b5e420f4595003c8c4669b2274bc5fa3856fc1be Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Thu, 10 Sep 2020 11:54:05 +0800 Subject: [PATCH 16/28] drm/i915/gvt: correct a false comment of flag F_UNALIGN Correct falsely removed comment of flag F_UNALIGN. Fixes: a6c5817a38cf ("drm/i915/gvt: remove flag F_CMD_ACCESSED") Reviewed-by: Zhenyu Wang Signed-off-by: Yan Zhao Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200910035405.20273-1-yan.y.zhao@intel.com (cherry picked from commit 6594094f819e0020e926e137e47e2edb97ba500b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 9831361f181e..a81cf0f01e78 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -255,7 +255,7 @@ struct intel_gvt_mmio { #define F_CMD_ACCESS (1 << 3) /* This reg has been accessed by a VM */ #define F_ACCESSED (1 << 4) -/* This reg has been accessed through GPU commands */ +/* This reg could be accessed by unaligned address */ #define F_UNALIGN (1 << 6) /* This reg is in GVT's mmio save-restor list and in hardware * logical context image From 08b49e14ec4f88f87a3a8443fca944dc2768066b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Nov 2020 11:37:14 +0000 Subject: [PATCH 17/28] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission Move the register slow register write and readback from out of the critical path for execlists submission and delay it until the following worker, shaving off around 200us. Note that the same signal_irq_work() is allowed to run concurrently on each CPU (but it will only be queued once, once running though it can be requeued and reexecuted) so we have to remember to lock the global interactions as we cannot rely on the signal_irq_work() itself providing the serialisation (in constrast to a tasklet). By pushing the arm/disarm into the central signaling worker we can close the race for disarming the interrupt (and dropping its associated GT wakeref) on parking the engine. If we loose the race, that GT wakeref may be held indefinitely, preventing the machine from sleeping while the GPU is ostensibly idle. v2: Move the self-arming parking of the signal_irq_work to a flush of the irq-work from intel_breadcrumbs_park(). Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2271 Fixes: e23005604b2f ("drm/i915/gt: Hold context/request reference while breadcrumbs are active") Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201123113717.20500-1-chris@chris-wilson.co.uk (cherry picked from commit 9d5612ca165a58aacc160465532e7998b9aab270) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 109 +++++++++++++------- 1 file changed, 70 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index d8b206e53660..8d85683314e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -30,18 +30,21 @@ #include "i915_trace.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" -static void irq_enable(struct intel_engine_cs *engine) +static bool irq_enable(struct intel_engine_cs *engine) { if (!engine->irq_enable) - return; + return false; /* Caller disables interrupts */ spin_lock(&engine->gt->irq_lock); engine->irq_enable(engine); spin_unlock(&engine->gt->irq_lock); + + return true; } static void irq_disable(struct intel_engine_cs *engine) @@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine) static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { - lockdep_assert_held(&b->irq_lock); - - if (!b->irq_engine || b->irq_armed) - return; - - if (!intel_gt_pm_get_if_awake(b->irq_engine->gt)) + /* + * Since we are waiting on a request, the GPU should be busy + * and should have its own rpm reference. + */ + if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt))) return; /* @@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) */ WRITE_ONCE(b->irq_armed, true); - /* - * Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. This is tracked - * by i915->gt.awake, we can forgo holding our own wakref - * for the interrupt as before i915->gt.awake is released (when - * the driver is idle) we disarm the breadcrumbs. - */ + /* Requests may have completed before we could enable the interrupt. */ + if (!b->irq_enabled++ && irq_enable(b->irq_engine)) + irq_work_queue(&b->irq_work); +} - if (!b->irq_enabled++) - irq_enable(b->irq_engine); +static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) +{ + if (!b->irq_engine) + return; + + spin_lock(&b->irq_lock); + if (!b->irq_armed) + __intel_breadcrumbs_arm_irq(b); + spin_unlock(&b->irq_lock); } static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { - lockdep_assert_held(&b->irq_lock); - - if (!b->irq_engine || !b->irq_armed) - return; - GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) irq_disable(b->irq_engine); @@ -105,8 +106,6 @@ static void add_signaling_context(struct intel_breadcrumbs *b, { intel_context_get(ce); list_add_tail(&ce->signal_link, &b->signalers); - if (list_is_first(&ce->signal_link, &b->signalers)) - __intel_breadcrumbs_arm_irq(b); } static void remove_signaling_context(struct intel_breadcrumbs *b, @@ -197,7 +196,32 @@ static void signal_irq_work(struct irq_work *work) spin_lock(&b->irq_lock); - if (list_empty(&b->signalers)) + /* + * Keep the irq armed until the interrupt after all listeners are gone. + * + * Enabling/disabling the interrupt is rather costly, roughly a couple + * of hundred microseconds. If we are proactive and enable/disable + * the interrupt around every request that wants a breadcrumb, we + * quickly drown in the extra orders of magnitude of latency imposed + * on request submission. + * + * So we try to be lazy, and keep the interrupts enabled until no + * more listeners appear within a breadcrumb interrupt interval (that + * is until a request completes that no one cares about). The + * observation is that listeners come in batches, and will often + * listen to a bunch of requests in succession. Though note on icl+, + * interrupts are always enabled due to concerns with rc6 being + * dysfunctional with per-engine interrupt masking. + * + * We also try to avoid raising too many interrupts, as they may + * be generated by userspace batches and it is unfortunately rather + * too easy to drown the CPU under a flood of GPU interrupts. Thus + * whenever no one appears to be listening, we turn off the interrupts. + * Fewer interrupts should conserve power -- at the very least, fewer + * interrupt draw less ire from other users of the system and tools + * like powertop. + */ + if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); list_splice_init(&b->signaled_requests, &signal); @@ -251,6 +275,9 @@ static void signal_irq_work(struct irq_work *work) i915_request_put(rq); } + + if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) + intel_breadcrumbs_arm_irq(b); } struct intel_breadcrumbs * @@ -292,21 +319,22 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) void intel_breadcrumbs_park(struct intel_breadcrumbs *b) { - unsigned long flags; - - if (!READ_ONCE(b->irq_armed)) - return; - - spin_lock_irqsave(&b->irq_lock, flags); - __intel_breadcrumbs_disarm_irq(b); - spin_unlock_irqrestore(&b->irq_lock, flags); - - if (!list_empty(&b->signalers)) - irq_work_queue(&b->irq_work); + /* Kick the work once more to drain the signalers */ + irq_work_sync(&b->irq_work); + while (unlikely(READ_ONCE(b->irq_armed))) { + local_irq_disable(); + signal_irq_work(&b->irq_work); + local_irq_enable(); + cond_resched(); + } + GEM_BUG_ON(!list_empty(&b->signalers)); } void intel_breadcrumbs_free(struct intel_breadcrumbs *b) { + irq_work_sync(&b->irq_work); + GEM_BUG_ON(!list_empty(&b->signalers)); + GEM_BUG_ON(b->irq_armed); kfree(b); } @@ -362,9 +390,12 @@ static void insert_breadcrumb(struct i915_request *rq, GEM_BUG_ON(!check_signal_order(ce, rq)); set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - /* Check after attaching to irq, interrupt may have already fired. */ - if (__request_completed(rq)) - irq_work_queue(&b->irq_work); + /* + * Defer enabling the interrupt to after HW submission and recheck + * the request as it may have completed and raised the interrupt as + * we were attaching it into the lists. + */ + irq_work_queue(&b->irq_work); } bool i915_request_enable_breadcrumb(struct i915_request *rq) From 7acc79eb5f78d3d1aa5dd21fc0a0329f1b7f2be5 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 17 Nov 2020 21:10:59 +0800 Subject: [PATCH 18/28] drm/amd/amdgpu: fix null pointer in runtime pm fix the null pointer issue when runtime pm is triggered. Signed-off-by: Kenneth Feng Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e3783f5a459d..026789b466db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4852,7 +4852,7 @@ int amdgpu_device_baco_enter(struct drm_device *dev) if (!amdgpu_device_supports_baco(adev_to_drm(adev))) return -ENOTSUPP; - if (ras && ras->supported) + if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); return amdgpu_dpm_baco_enter(adev); @@ -4871,7 +4871,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev) if (ret) return ret; - if (ras && ras->supported) + if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; From 4d6a95366117b241bb3298e1c318a36ebb7544d0 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Fri, 6 Nov 2020 16:42:47 -0500 Subject: [PATCH 19/28] drm/amdgpu: fix SI UVD firmware validate resume fail The SI UVD firmware validate key is stored at the end of firmware, which is changed during resume while playing video. So get the key at sw_init and store it for fw validate using. Signed-off-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 5eb63288d157..edbb8194ee81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -67,6 +67,7 @@ struct amdgpu_uvd { unsigned harvest_config; /* store image width to adjust nb memory state */ unsigned decode_image_width; + uint32_t keyselect; }; int amdgpu_uvd_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 7cf4b11a65c5..3a5dce634cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -277,15 +277,8 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev) */ static int uvd_v3_1_fw_validate(struct amdgpu_device *adev) { - void *ptr; - uint32_t ucode_len, i; - uint32_t keysel; - - ptr = adev->uvd.inst[0].cpu_addr; - ptr += 192 + 16; - memcpy(&ucode_len, ptr, 4); - ptr += ucode_len; - memcpy(&keysel, ptr, 4); + int i; + uint32_t keysel = adev->uvd.keyselect; WREG32(mmUVD_FW_START, keysel); @@ -550,6 +543,8 @@ static int uvd_v3_1_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + void *ptr; + uint32_t ucode_len; /* UVD TRAP */ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); @@ -560,6 +555,13 @@ static int uvd_v3_1_sw_init(void *handle) if (r) return r; + /* Retrieval firmware validate key */ + ptr = adev->uvd.inst[0].cpu_addr; + ptr += 192 + 16; + memcpy(&ucode_len, ptr, 4); + ptr += ucode_len; + memcpy(&adev->uvd.keyselect, ptr, 4); + ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0, From dbbf2728d50343b7947001a81f4c8cc98e4b44e5 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Fri, 20 Nov 2020 02:38:09 -0500 Subject: [PATCH 20/28] drm/amdgpu: fix a page fault The UVD firmware is copied to cpu addr in uvd_resume, so it should be used after that. This is to fix a bug introduced by patch drm/amdgpu: fix SI UVD firmware validate resume fail. Signed-off-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher CC: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 3a5dce634cda..41800fcad410 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -555,13 +555,6 @@ static int uvd_v3_1_sw_init(void *handle) if (r) return r; - /* Retrieval firmware validate key */ - ptr = adev->uvd.inst[0].cpu_addr; - ptr += 192 + 16; - memcpy(&ucode_len, ptr, 4); - ptr += ucode_len; - memcpy(&adev->uvd.keyselect, ptr, 4); - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0, @@ -573,6 +566,13 @@ static int uvd_v3_1_sw_init(void *handle) if (r) return r; + /* Retrieval firmware validate key */ + ptr = adev->uvd.inst[0].cpu_addr; + ptr += 192 + 16; + memcpy(&ucode_len, ptr, 4); + ptr += ucode_len; + memcpy(&adev->uvd.keyselect, ptr, 4); + r = amdgpu_uvd_entity_init(adev); return r; From eb0104ee498d7f83ff98b8783181613685b8df6e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Nov 2020 11:37:15 +0000 Subject: [PATCH 21/28] drm/i915/gt: Track signaled breadcrumbs outside of the breadcrumb spinlock Make b->signaled_requests a lockless-list so that we can manipulate it outside of the b->irq_lock. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201123113717.20500-2-chris@chris-wilson.co.uk (cherry picked from commit 6cfe66eb71b638968350b5f0fff051fd25eb75fb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 34 ++++++++++++------- .../gpu/drm/i915/gt/intel_breadcrumbs_types.h | 2 +- drivers/gpu/drm/i915/i915_request.h | 6 +++- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 8d85683314e1..43cfabb102ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -173,26 +173,34 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) intel_engine_add_retire(b->irq_engine, tl); } -static bool __signal_request(struct i915_request *rq, struct list_head *signals) +static bool __signal_request(struct i915_request *rq) { - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - if (!__dma_fence_signal(&rq->fence)) { i915_request_put(rq); return false; } - list_add_tail(&rq->signal_link, signals); return true; } +static struct llist_node * +slist_add(struct llist_node *node, struct llist_node *head) +{ + node->next = head; + return node; +} + static void signal_irq_work(struct irq_work *work) { struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); const ktime_t timestamp = ktime_get(); + struct llist_node *signal, *sn; struct intel_context *ce, *cn; struct list_head *pos, *next; - LIST_HEAD(signal); + + signal = NULL; + if (unlikely(!llist_empty(&b->signaled_requests))) + signal = llist_del_all(&b->signaled_requests); spin_lock(&b->irq_lock); @@ -224,8 +232,6 @@ static void signal_irq_work(struct irq_work *work) if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); - list_splice_init(&b->signaled_requests, &signal); - list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { GEM_BUG_ON(list_empty(&ce->signals)); @@ -242,7 +248,10 @@ static void signal_irq_work(struct irq_work *work) * spinlock as the callback chain may end up adding * more signalers to the same context or engine. */ - __signal_request(rq, &signal); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + if (__signal_request(rq)) + /* We own signal_node now, xfer to local list */ + signal = slist_add(&rq->signal_node, signal); } /* @@ -262,9 +271,9 @@ static void signal_irq_work(struct irq_work *work) spin_unlock(&b->irq_lock); - list_for_each_safe(pos, next, &signal) { + llist_for_each_safe(signal, sn, signal) { struct i915_request *rq = - list_entry(pos, typeof(*rq), signal_link); + llist_entry(signal, typeof(*rq), signal_node); struct list_head cb_list; spin_lock(&rq->lock); @@ -291,7 +300,7 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) spin_lock_init(&b->irq_lock); INIT_LIST_HEAD(&b->signalers); - INIT_LIST_HEAD(&b->signaled_requests); + init_llist_head(&b->signaled_requests); init_irq_work(&b->irq_work, signal_irq_work); @@ -355,7 +364,8 @@ static void insert_breadcrumb(struct i915_request *rq, * its signal completion. */ if (__request_completed(rq)) { - if (__signal_request(rq, &b->signaled_requests)) + if (__signal_request(rq) && + llist_add(&rq->signal_node, &b->signaled_requests)) irq_work_queue(&b->irq_work); return; } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 8e53b9942695..3fa19820b37a 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -35,7 +35,7 @@ struct intel_breadcrumbs { struct intel_engine_cs *irq_engine; struct list_head signalers; - struct list_head signaled_requests; + struct llist_head signaled_requests; struct irq_work irq_work; /* for use from inside irq_lock */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 16b721080195..874af6db6103 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -176,7 +176,11 @@ struct i915_request { struct intel_context *context; struct intel_ring *ring; struct intel_timeline __rcu *timeline; - struct list_head signal_link; + + union { + struct list_head signal_link; + struct llist_node signal_node; + }; /* * The rcu epoch of when this request was allocated. Used to judiciously From 2e6ce8313a53b757b28b288bf4bb930df786e899 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Nov 2020 11:37:16 +0000 Subject: [PATCH 22/28] drm/i915/gt: Don't cancel the interrupt shadow too early We currently want to keep the interrupt enabled until the interrupt after which we have no more work to do. This heuristic was broken by us kicking the irq-work on adding a completed request without attaching a signaler -- hence it appearing to the irq-worker that an interrupt had fired when we were idle. Fixes: 2854d866327a ("drm/i915/gt: Replace intel_engine_transfer_stale_breadcrumbs") Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201123113717.20500-3-chris@chris-wilson.co.uk (cherry picked from commit 3aef910d26ef48b8a79d48b006dc04383b86dd31) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 43cfabb102ea..cf6e05ea4d8f 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -229,7 +229,7 @@ static void signal_irq_work(struct irq_work *work) * interrupt draw less ire from other users of the system and tools * like powertop. */ - if (b->irq_armed && list_empty(&b->signalers)) + if (!signal && b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { From 280ffdb6ddb5de85eddd476a3bcdc19c9a80f089 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Nov 2020 11:37:17 +0000 Subject: [PATCH 23/28] drm/i915/gt: Free stale request on destroying the virtual engine Since preempt-to-busy, we may unsubmit a request while it is still on the HW and completes asynchronously. That means it may be retired and in the process destroy the virtual engine (as the user has closed their context), but that engine may still be holding onto the unsubmitted compelted request. Therefore we need to potentially cleanup the old request on destroying the virtual engine. We also have to keep the virtual_engine alive until after the sibling's execlists_dequeue() have finished peeking into the virtual engines, for which we serialise with RCU. v2: Be paranoid and flush the tasklet as well. v3: And flush the tasklet before the engines, as the tasklet may re-attach an rb_node after our removal from the siblings. Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201123113717.20500-4-chris@chris-wilson.co.uk (cherry picked from commit 46eecfccb4c2b0f258adbafb2e53ca3b822cd663) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_lrc.c | 60 +++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 9bb16bdf93cf..0952bf157234 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -182,6 +182,7 @@ struct virtual_engine { struct intel_engine_cs base; struct intel_context context; + struct rcu_work rcu; /* * We allow only a single request through the virtual engine at a time @@ -5425,33 +5426,57 @@ static struct list_head *virtual_queue(struct virtual_engine *ve) return &ve->base.execlists.default_priolist.requests[0]; } -static void virtual_context_destroy(struct kref *kref) +static void rcu_virtual_context_destroy(struct work_struct *wrk) { struct virtual_engine *ve = - container_of(kref, typeof(*ve), context.ref); + container_of(wrk, typeof(*ve), rcu.work); unsigned int n; - GEM_BUG_ON(!list_empty(virtual_queue(ve))); - GEM_BUG_ON(ve->request); GEM_BUG_ON(ve->context.inflight); + /* Preempt-to-busy may leave a stale request behind. */ + if (unlikely(ve->request)) { + struct i915_request *old; + + spin_lock_irq(&ve->base.active.lock); + + old = fetch_and_zero(&ve->request); + if (old) { + GEM_BUG_ON(!i915_request_completed(old)); + __i915_request_submit(old); + i915_request_put(old); + } + + spin_unlock_irq(&ve->base.active.lock); + } + + /* + * Flush the tasklet in case it is still running on another core. + * + * This needs to be done before we remove ourselves from the siblings' + * rbtrees as in the case it is running in parallel, it may reinsert + * the rb_node into a sibling. + */ + tasklet_kill(&ve->base.execlists.tasklet); + + /* Decouple ourselves from the siblings, no more access allowed. */ for (n = 0; n < ve->num_siblings; n++) { struct intel_engine_cs *sibling = ve->siblings[n]; struct rb_node *node = &ve->nodes[sibling->id].rb; - unsigned long flags; if (RB_EMPTY_NODE(node)) continue; - spin_lock_irqsave(&sibling->active.lock, flags); + spin_lock_irq(&sibling->active.lock); /* Detachment is lazily performed in the execlists tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irqrestore(&sibling->active.lock, flags); + spin_unlock_irq(&sibling->active.lock); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + GEM_BUG_ON(!list_empty(virtual_queue(ve))); if (ve->context.state) __execlists_context_fini(&ve->context); @@ -5464,6 +5489,27 @@ static void virtual_context_destroy(struct kref *kref) kfree(ve); } +static void virtual_context_destroy(struct kref *kref) +{ + struct virtual_engine *ve = + container_of(kref, typeof(*ve), context.ref); + + GEM_BUG_ON(!list_empty(&ve->context.signals)); + + /* + * When destroying the virtual engine, we have to be aware that + * it may still be in use from an hardirq/softirq context causing + * the resubmission of a completed request (background completion + * due to preempt-to-busy). Before we can free the engine, we need + * to flush the submission code and tasklets that are still potentially + * accessing the engine. Flushing the tasklets requires process context, + * and since we can guard the resubmit onto the engine with an RCU read + * lock, we can delegate the free of the engine to an RCU worker. + */ + INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); + queue_rcu_work(system_wq, &ve->rcu); +} + static void virtual_engine_initial_hint(struct virtual_engine *ve) { int swp; From d661155bfca329851a27bb5120fab027db43bd23 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 17 Nov 2020 15:25:48 -0500 Subject: [PATCH 24/28] drm/amd/display: Avoid HDCP initialization in devices without output The HDCP feature requires at least one connector attached to the device; however, some GPUs do not have a physical output, making the HDCP initialization irrelevant. This patch disables HDCP initialization when the graphic card does not have output. Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0e7118000919..9b6809f309f4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1041,7 +1041,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) amdgpu_dm_init_color_mod(); #ifdef CONFIG_DRM_AMD_DC_HDCP - if (adev->asic_type >= CHIP_RAVEN) { + if (adev->dm.dc->caps.max_links > 0 && adev->asic_type >= CHIP_RAVEN) { adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, &init_params.cp_psp, adev->dm.dc); if (!adev->dm.hdcp_workqueue) From 60734bd54679d7998a24a257b0403f7644005572 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Mon, 23 Nov 2020 10:28:46 +0800 Subject: [PATCH 25/28] drm/amdgpu: update golden setting for sienna_cichlid Update golden setting for sienna_cichlid. Signed-off-by: Likun Gao Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.9.x --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 3579565e0eab..55f4b8c3b933 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3105,6 +3105,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), + SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), From 030c5b52d4c1225030891d25abfe376b6e239712 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Fri, 23 Oct 2020 13:41:12 +0800 Subject: [PATCH 26/28] drm/amdgpu: Fix size calculation when init onchip memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Size is page count here. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1372 Reviewed-by: Christian König Signed-off-by: xinhui pan Signed-off-by: Alex Deucher (cherry picked from commit d836917da7e5ca9b33ef4d499972f1feeb519e00) [airlied: from drm-next] Signed-off-by: Dave Airlie --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8039d2399584..a0248d78190f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -69,10 +69,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev, static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, unsigned int type, - uint64_t size) + uint64_t size_in_page) { return ttm_range_man_init(&adev->mman.bdev, type, - false, size >> PAGE_SHIFT); + false, size_in_page); } /** From 10e26e749fd0ba78a913548e2efeca1a157772da Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 22 Sep 2020 16:46:55 +0200 Subject: [PATCH 27/28] drm/ast: Reload gamma LUT after changing primary plane's color format The gamma LUT has to be reloaded after changing the primary plane's color format. This used to be done implicitly by the CRTC atomic_enable() helper after updating the primary plane. With the recent reordering of the steps, the primary plane's setup was moved last and invalidated the gamma LUT. Fix this by setting the LUT from within atomic_flush(). Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Fixes: 2f0ddd89fe32 ("drm/ast: Enable CRTC before planes") Cc: Thomas Zimmermann Cc: Daniel Vetter Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Link: https://patchwork.freedesktop.org/patch/msgid/20200922144655.23624-1-tzimmermann@suse.de (cherry-picked from 8e3784dfef8a03143b13e7e4011f276a954f1bc6) --- drivers/gpu/drm/ast/ast_mode.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 834a156e3a75..0a1e1cf57e19 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -742,7 +742,6 @@ static void ast_crtc_dpms(struct drm_crtc *crtc, int mode) case DRM_MODE_DPMS_SUSPEND: if (ast->tx_chip_type == AST_TX_DP501) ast_set_dp501_video_output(crtc->dev, 1); - ast_crtc_load_lut(ast, crtc); break; case DRM_MODE_DPMS_OFF: if (ast->tx_chip_type == AST_TX_DP501) @@ -777,6 +776,21 @@ static int ast_crtc_helper_atomic_check(struct drm_crtc *crtc, return 0; } +static void +ast_crtc_helper_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) +{ + struct ast_private *ast = to_ast_private(crtc->dev); + struct ast_crtc_state *ast_crtc_state = to_ast_crtc_state(crtc->state); + struct ast_crtc_state *old_ast_crtc_state = to_ast_crtc_state(old_crtc_state); + + /* + * The gamma LUT has to be reloaded after changing the primary + * plane's color format. + */ + if (old_ast_crtc_state->format != ast_crtc_state->format) + ast_crtc_load_lut(ast, crtc); +} + static void ast_crtc_helper_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) @@ -830,6 +844,7 @@ ast_crtc_helper_atomic_disable(struct drm_crtc *crtc, static const struct drm_crtc_helper_funcs ast_crtc_helper_funcs = { .atomic_check = ast_crtc_helper_atomic_check, + .atomic_flush = ast_crtc_helper_atomic_flush, .atomic_enable = ast_crtc_helper_atomic_enable, .atomic_disable = ast_crtc_helper_atomic_disable, }; From 2be65641642ef423f82162c3a5f28c754d1637d2 Mon Sep 17 00:00:00 2001 From: Matti Hamalainen Date: Fri, 20 Nov 2020 17:23:38 +0200 Subject: [PATCH 28/28] drm/nouveau: fix relocations applying logic and a double-free Commit 03e0d26fcf79 ("drm/nouveau: slowpath for pushbuf ioctl") included a logic-bug which results in the relocations not actually getting applied at all as the call to nouveau_gem_pushbuf_reloc_apply() is never reached. This causes a regression with graphical corruption, triggered when relocations need to be done (for example after a suspend/resume cycle.) Fix by setting *apply_relocs value only if there were more than 0 relocations. Additionally, the never reached code had a leftover u_free() call, which, after fixing the logic, now got called and resulted in a double-free. Fix by removing one u_free(), moving the other and adding check for errors. Cc: Daniel Vetter Cc: Ben Skeggs Cc: nouveau@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Matti Hamalainen Fixes: 03e0d26fcf79 ("drm/nouveau: slowpath for pushbuf ioctl") References: https://gitlab.freedesktop.org/drm/nouveau/-/issues/11 Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201120152338.1203257-1-ccr@tnsp.org --- drivers/gpu/drm/nouveau/nouveau_gem.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 549bc67feabb..c2051380d18c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -558,8 +558,10 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, NV_PRINTK(err, cli, "validating bo list\n"); validate_fini(op, chan, NULL, NULL); return ret; + } else if (ret > 0) { + *apply_relocs = true; } - *apply_relocs = ret; + return 0; } @@ -662,7 +664,6 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, nouveau_bo_wr32(nvbo, r->reloc_bo_offset >> 2, data); } - u_free(reloc); return ret; } @@ -872,9 +873,10 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, break; } } - u_free(reloc); } out_prevalid: + if (!IS_ERR(reloc)) + u_free(reloc); u_free(bo); u_free(push);