From 7b30255698edc91f9235faf586d21625ca7bbbac Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Nov 2017 13:29:58 +0000 Subject: [PATCH 01/87] drm/i915/gvt: Cleanup unwanted public symbols drivers/gpu/drm/i915/gvt/execlist.c:531:6: warning: symbol 'clean_execlist' was not declared. Should it be static? drivers/gpu/drm/i915/gvt/execlist.c:545:6: warning: symbol 'reset_execlist' was not declared. Should it be static? drivers/gpu/drm/i915/gvt/execlist.c:556:5: warning: symbol 'init_execlist' was not declared. Should it be static? drivers/gpu/drm/i915/gvt/scheduler.c:248:6: warning: symbol 'release_shadow_wa_ctx' was not declared. Should it be static? References: 06bb372f9ace ("drm/i915/gvt: Introduce intel_vgpu_reset_submission") Signed-off-by: Chris Wilson Cc: Zhi Wang Cc: Zhenyu Wang Cc: intel-gvt-dev@lists.freedesktop.org Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 6 +++--- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index c9fa0fb488d3..fa4929584744 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -528,7 +528,7 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; } -void clean_execlist(struct intel_vgpu *vgpu) +static void clean_execlist(struct intel_vgpu *vgpu) { enum intel_engine_id i; struct intel_engine_cs *engine; @@ -542,7 +542,7 @@ void clean_execlist(struct intel_vgpu *vgpu) } } -void reset_execlist(struct intel_vgpu *vgpu, +static void reset_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; @@ -553,7 +553,7 @@ void reset_execlist(struct intel_vgpu *vgpu, init_vgpu_execlist(vgpu, engine->id); } -int init_execlist(struct intel_vgpu *vgpu) +static int init_execlist(struct intel_vgpu *vgpu) { reset_execlist(vgpu, ALL_ENGINES); return 0; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index a742b364c2c3..13ccc00f0d40 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -245,7 +245,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) return 0; } -void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) +static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { if (!wa_ctx->indirect_ctx.obj) return; From ea26c96d59b27e878fe61e8ef0fed840d2281a2f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Dec 2017 11:23:44 +0800 Subject: [PATCH 02/87] drm/i915/gvt: Fix out-of-bounds buffer write into opregion->signature[] sparse spots drivers/gpu/drm/i915/gvt/opregion.c:234 alloc_and_init_virt_opregion() error: memcpy() 'header->signature' too small (16 vs 17) as gvt is indeed trying to memcpy a string longer than the signature[]. Fixes: b2d6ef70614e ("drm/i915/gvt: Let each vgpu has separate opregion memory") Signed-off-by: Chris Wilson Cc: Xiong Zhang Cc: Zhenyu Wang Cc: Zhi Wang Cc: intel-gvt-dev@lists.freedesktop.org Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/opregion.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 80720e59723a..bf8e5c49a64a 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -218,6 +218,7 @@ static int alloc_and_init_virt_opregion(struct intel_vgpu *vgpu) u8 *buf; struct opregion_header *header; struct vbt v; + const char opregion_signature[16] = OPREGION_SIGNATURE; gvt_dbg_core("init vgpu%d opregion\n", vgpu->id); vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL | @@ -231,8 +232,8 @@ static int alloc_and_init_virt_opregion(struct intel_vgpu *vgpu) /* emulated opregion with VBT mailbox only */ buf = (u8 *)vgpu_opregion(vgpu)->va; header = (struct opregion_header *)buf; - memcpy(header->signature, OPREGION_SIGNATURE, - sizeof(OPREGION_SIGNATURE)); + memcpy(header->signature, opregion_signature, + sizeof(opregion_signature)); header->size = 0x8; header->opregion_ver = 0x02000000; header->mboxes = MBOX_VBT; From 4dff110b15aea2f7653957a70921a7be1f45d59b Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Mon, 20 Nov 2017 15:31:15 +0800 Subject: [PATCH 03/87] drm/i915/gvt: Alloc and Init guest opregion at vgpu creation Currently guest opregion is allocated and initialised when guest write opregion base register. This is too late for kvmgt, so move it to vgpu_create time. Signed-off-by: Xiong Zhang Tested-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 3 +- drivers/gpu/drm/i915/gvt/gvt.h | 4 +- drivers/gpu/drm/i915/gvt/opregion.c | 98 +++++++++++++--------------- drivers/gpu/drm/i915/gvt/vgpu.c | 8 ++- 4 files changed, 57 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 4ce2e6bd0680..97bfc00d2a82 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -335,7 +335,8 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, case INTEL_GVT_PCI_OPREGION: if (WARN_ON(!IS_ALIGNED(offset, 4))) return -EINVAL; - ret = intel_vgpu_init_opregion(vgpu, *(u32 *)p_data); + ret = intel_vgpu_opregion_base_write_handler(vgpu, + *(u32 *)p_data); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 393066726993..07d3ba0f7277 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -123,6 +123,7 @@ struct intel_vgpu_irq { }; struct intel_vgpu_opregion { + bool mapped; void *va; u32 gfn[INTEL_GVT_OPREGION_PAGES]; }; @@ -505,7 +506,8 @@ static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar) } void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu); -int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa); +int intel_vgpu_init_opregion(struct intel_vgpu *vgpu); +int intel_vgpu_opregion_base_write_handler(struct intel_vgpu *vgpu, u32 gpa); int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci); void populate_pvinfo_page(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index bf8e5c49a64a..36172f33bd51 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -213,7 +213,15 @@ static void virt_vbt_generation(struct vbt *v) v->driver_features.lvds_config = BDB_DRIVER_FEATURE_NO_LVDS; } -static int alloc_and_init_virt_opregion(struct intel_vgpu *vgpu) +/** + * intel_vgpu_init_opregion - initialize the stuff used to emulate opregion + * @vgpu: a vGPU + * @gpa: guest physical address of opregion + * + * Returns: + * Zero on success, negative error code if failed. + */ +int intel_vgpu_init_opregion(struct intel_vgpu *vgpu) { u8 *buf; struct opregion_header *header; @@ -251,25 +259,6 @@ static int alloc_and_init_virt_opregion(struct intel_vgpu *vgpu) return 0; } -static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) -{ - int i, ret; - - if (WARN((vgpu_opregion(vgpu)->va), - "vgpu%d: opregion has been initialized already.\n", - vgpu->id)) - return -EINVAL; - - ret = alloc_and_init_virt_opregion(vgpu); - if (ret < 0) - return ret; - - for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) - vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; - - return 0; -} - static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) { u64 mfn; @@ -291,9 +280,40 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) return ret; } } + + vgpu_opregion(vgpu)->mapped = map; + return 0; } +/** + * intel_vgpu_opregion_base_write_handler - Opregion base register write handler + * + * @vgpu: a vGPU + * @gpa: guest physical address of opregion + * + * Returns: + * Zero on success, negative error code if failed. + */ +int intel_vgpu_opregion_base_write_handler(struct intel_vgpu *vgpu, u32 gpa) +{ + int i, ret; + + /** + * Wins guest on Xengt will write this register twice: xen hvmloader and + * windows graphic driver. + */ + if (vgpu_opregion(vgpu)->mapped) + map_vgpu_opregion(vgpu, false); + + for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) + vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; + + ret = map_vgpu_opregion(vgpu, true); + + return ret; +} + /** * intel_vgpu_clean_opregion - clean the stuff used to emulate opregion * @vgpu: a vGPU @@ -306,43 +326,15 @@ void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu) if (!vgpu_opregion(vgpu)->va) return; - if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) { + if (vgpu_opregion(vgpu)->mapped) map_vgpu_opregion(vgpu, false); - free_pages((unsigned long)vgpu_opregion(vgpu)->va, - get_order(INTEL_GVT_OPREGION_SIZE)); - vgpu_opregion(vgpu)->va = NULL; - } + free_pages((unsigned long)vgpu_opregion(vgpu)->va, + get_order(INTEL_GVT_OPREGION_SIZE)); + + vgpu_opregion(vgpu)->va = NULL; } -/** - * intel_vgpu_init_opregion - initialize the stuff used to emulate opregion - * @vgpu: a vGPU - * @gpa: guest physical address of opregion - * - * Returns: - * Zero on success, negative error code if failed. - */ -int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa) -{ - int ret; - - gvt_dbg_core("vgpu%d: init vgpu opregion\n", vgpu->id); - - if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) { - gvt_dbg_core("emulate opregion from kernel\n"); - - ret = init_vgpu_opregion(vgpu, gpa); - if (ret) - return ret; - - ret = map_vgpu_opregion(vgpu, true); - if (ret) - return ret; - } - - return 0; -} #define GVT_OPREGION_FUNC(scic) \ ({ \ diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index c6b82d1ba7de..2896aafc9520 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -370,10 +370,14 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_detach_hypervisor_vgpu; - ret = intel_vgpu_init_display(vgpu, param->resolution); + ret = intel_vgpu_init_opregion(vgpu); if (ret) goto out_clean_gtt; + ret = intel_vgpu_init_display(vgpu, param->resolution); + if (ret) + goto out_clean_opregion; + ret = intel_vgpu_setup_submission(vgpu); if (ret) goto out_clean_display; @@ -396,6 +400,8 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, intel_vgpu_clean_submission(vgpu); out_clean_display: intel_vgpu_clean_display(vgpu); +out_clean_opregion: + intel_vgpu_clean_opregion(vgpu); out_clean_gtt: intel_vgpu_clean_gtt(vgpu); out_detach_hypervisor_vgpu: From b851adeac0858c7d257b32eee2142b1519d45ccf Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Mon, 20 Nov 2017 15:31:16 +0800 Subject: [PATCH 04/87] drm/i915/gvt: Add opregion support Windows guest driver needs vbt in opregion, to configure the setting for display. Without opregion support, the display registers won't be set and this blocks display model to get the correct information of the guest display plane. This patch is to provide a virtual opregion for guest. The original author of this patch is Xiaoguang Chen. This patch is split from the "Dma-buf support for GVT-g" patch set, with being rebased to the latest gvt-staging branch. v3: - add checking region index during intel_vgpu_rw. (Xiong) v2: - refine intel_vgpu_reg_release_opregion. (Xiong) Here are the previous version comments: v18: - unmap vgpu's opregion when destroying vgpu. v16: - rebase to 4.14.0-rc6. Signed-off-by: Bing Niu Signed-off-by: Tina Zhang Tested-by: Xiong Zhang Cc: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/hypercall.h | 1 + drivers/gpu/drm/i915/gvt/kvmgt.c | 108 ++++++++++++++++++++++++++- drivers/gpu/drm/i915/gvt/mpt.h | 15 ++++ drivers/gpu/drm/i915/gvt/opregion.c | 74 ++++++++++++++---- drivers/gpu/drm/i915/gvt/vgpu.c | 4 + 6 files changed, 184 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 07d3ba0f7277..f3999f251cd5 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -125,6 +125,7 @@ struct intel_vgpu_irq { struct intel_vgpu_opregion { bool mapped; void *va; + void *va_gopregion; u32 gfn[INTEL_GVT_OPREGION_PAGES]; }; diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index df7f33abd393..32c345c3fa27 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -55,6 +55,7 @@ struct intel_gvt_mpt { unsigned long mfn, unsigned int nr, bool map); int (*set_trap_area)(unsigned long handle, u64 start, u64 end, bool map); + int (*set_opregion)(void *vgpu); }; extern struct intel_gvt_mpt xengt_mpt; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 110f07e8bcfb..e0cda45ac6c2 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -53,11 +53,23 @@ static const struct intel_gvt_ops *intel_gvt_ops; #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +#define OPREGION_SIGNATURE "IntelGraphicsMem" + +struct vfio_region; +struct intel_vgpu_regops { + size_t (*rw)(struct intel_vgpu *vgpu, char *buf, + size_t count, loff_t *ppos, bool iswrite); + void (*release)(struct intel_vgpu *vgpu, + struct vfio_region *region); +}; + struct vfio_region { u32 type; u32 subtype; size_t size; u32 flags; + const struct intel_vgpu_regops *ops; + void *data; }; struct kvmgt_pgfn { @@ -316,6 +328,87 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, } } +static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - + VFIO_PCI_NUM_REGIONS; + void *base = vgpu->vdev.region[i].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (pos >= vgpu->vdev.region[i].size || iswrite) { + gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n"); + return -EINVAL; + } + count = min(count, (size_t)(vgpu->vdev.region[i].size - pos)); + memcpy(buf, base + pos, count); + + return count; +} + +static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu, + struct vfio_region *region) +{ +} + +static const struct intel_vgpu_regops intel_vgpu_regops_opregion = { + .rw = intel_vgpu_reg_rw_opregion, + .release = intel_vgpu_reg_release_opregion, +}; + +static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, + unsigned int type, unsigned int subtype, + const struct intel_vgpu_regops *ops, + size_t size, u32 flags, void *data) +{ + struct vfio_region *region; + + region = krealloc(vgpu->vdev.region, + (vgpu->vdev.num_regions + 1) * sizeof(*region), + GFP_KERNEL); + if (!region) + return -ENOMEM; + + vgpu->vdev.region = region; + vgpu->vdev.region[vgpu->vdev.num_regions].type = type; + vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype; + vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops; + vgpu->vdev.region[vgpu->vdev.num_regions].size = size; + vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags; + vgpu->vdev.region[vgpu->vdev.num_regions].data = data; + vgpu->vdev.num_regions++; + + return 0; +} + +static int kvmgt_set_opregion(void *p_vgpu) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + void *base; + int ret; + + /* Each vgpu has its own opregion, although VFIO would create another + * one later. This one is used to expose opregion to VFIO. And the + * other one created by VFIO later, is used by guest actually. + */ + base = vgpu_opregion(vgpu)->va; + if (!base) + return -ENOMEM; + + if (memcmp(base, OPREGION_SIGNATURE, 16)) { + memunmap(base); + return -EINVAL; + } + + ret = intel_vgpu_register_reg(vgpu, + PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, + &intel_vgpu_regops_opregion, OPREGION_SIZE, + VFIO_REGION_INFO_FLAG_READ, base); + + return ret; +} + static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) { struct intel_vgpu *vgpu = NULL; @@ -546,7 +639,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, int ret = -EINVAL; - if (index >= VFIO_PCI_NUM_REGIONS) { + if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) { gvt_vgpu_err("invalid index: %u\n", index); return -EINVAL; } @@ -574,8 +667,14 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, case VFIO_PCI_BAR5_REGION_INDEX: case VFIO_PCI_VGA_REGION_INDEX: case VFIO_PCI_ROM_REGION_INDEX: + break; default: - gvt_vgpu_err("unsupported region: %u\n", index); + if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) + return -EINVAL; + + index -= VFIO_PCI_NUM_REGIONS; + return vgpu->vdev.region[index].ops->rw(vgpu, buf, count, + ppos, is_write); } return ret == 0 ? count : ret; @@ -838,7 +937,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, info.flags = VFIO_DEVICE_FLAGS_PCI; info.flags |= VFIO_DEVICE_FLAGS_RESET; - info.num_regions = VFIO_PCI_NUM_REGIONS; + info.num_regions = VFIO_PCI_NUM_REGIONS + + vgpu->vdev.num_regions; info.num_irqs = VFIO_PCI_NUM_IRQS; return copy_to_user((void __user *)arg, &info, minsz) ? @@ -959,6 +1059,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } if (caps.size) { + info.flags |= VFIO_REGION_INFO_FLAG_CAPS; if (info.argsz < sizeof(info) + caps.size) { info.argsz = sizeof(info) + caps.size; info.cap_offset = 0; @@ -1426,6 +1527,7 @@ struct intel_gvt_mpt kvmgt_mpt = { .read_gpa = kvmgt_read_gpa, .write_gpa = kvmgt_write_gpa, .gfn_to_mfn = kvmgt_gfn_to_pfn, + .set_opregion = kvmgt_set_opregion, }; EXPORT_SYMBOL_GPL(kvmgt_mpt); diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index c436e20ea59e..c99e7964731c 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -294,4 +294,19 @@ static inline int intel_gvt_hypervisor_set_trap_area( return intel_gvt_host.mpt->set_trap_area(vgpu->handle, start, end, map); } +/** + * intel_gvt_hypervisor_set_opregion - Set opregion for guest + * @vgpu: a vGPU + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu) +{ + if (!intel_gvt_host.mpt->set_opregion) + return 0; + + return intel_gvt_host.mpt->set_opregion(vgpu); +} + #endif /* _GVT_MPT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 36172f33bd51..8420d1fc3ddb 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -297,19 +297,41 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) */ int intel_vgpu_opregion_base_write_handler(struct intel_vgpu *vgpu, u32 gpa) { - int i, ret; - /** - * Wins guest on Xengt will write this register twice: xen hvmloader and - * windows graphic driver. - */ - if (vgpu_opregion(vgpu)->mapped) - map_vgpu_opregion(vgpu, false); + int i, ret = 0; + unsigned long pfn; - for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) - vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; + gvt_dbg_core("emulate opregion from kernel\n"); - ret = map_vgpu_opregion(vgpu, true); + switch (intel_gvt_host.hypervisor_type) { + case INTEL_GVT_HYPERVISOR_KVM: + pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gpa >> PAGE_SHIFT); + vgpu_opregion(vgpu)->va_gopregion = memremap(pfn << PAGE_SHIFT, + INTEL_GVT_OPREGION_SIZE, + MEMREMAP_WB); + if (!vgpu_opregion(vgpu)->va_gopregion) { + gvt_vgpu_err("failed to map guest opregion\n"); + ret = -EFAULT; + } + vgpu_opregion(vgpu)->mapped = true; + break; + case INTEL_GVT_HYPERVISOR_XEN: + /** + * Wins guest on Xengt will write this register twice: xen + * hvmloader and windows graphic driver. + */ + if (vgpu_opregion(vgpu)->mapped) + map_vgpu_opregion(vgpu, false); + + for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) + vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; + + ret = map_vgpu_opregion(vgpu, true); + break; + default: + ret = -EINVAL; + gvt_vgpu_err("not supported hypervisor\n"); + } return ret; } @@ -326,13 +348,20 @@ void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu) if (!vgpu_opregion(vgpu)->va) return; - if (vgpu_opregion(vgpu)->mapped) - map_vgpu_opregion(vgpu, false); - + if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) { + if (vgpu_opregion(vgpu)->mapped) + map_vgpu_opregion(vgpu, false); + } else if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) { + if (vgpu_opregion(vgpu)->mapped) { + memunmap(vgpu_opregion(vgpu)->va_gopregion); + vgpu_opregion(vgpu)->va_gopregion = NULL; + } + } free_pages((unsigned long)vgpu_opregion(vgpu)->va, - get_order(INTEL_GVT_OPREGION_SIZE)); + get_order(INTEL_GVT_OPREGION_SIZE)); vgpu_opregion(vgpu)->va = NULL; + } @@ -454,8 +483,21 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci) u32 *scic, *parm; u32 func, subfunc; - scic = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_SCIC; - parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM; + switch (intel_gvt_host.hypervisor_type) { + case INTEL_GVT_HYPERVISOR_XEN: + scic = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_SCIC; + parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM; + break; + case INTEL_GVT_HYPERVISOR_KVM: + scic = vgpu_opregion(vgpu)->va_gopregion + + INTEL_GVT_OPREGION_SCIC; + parm = vgpu_opregion(vgpu)->va_gopregion + + INTEL_GVT_OPREGION_PARM; + break; + default: + gvt_vgpu_err("not supported hypervisor\n"); + return -EINVAL; + } if (!(swsci & SWSCI_SCI_SELECT)) { gvt_vgpu_err("requesting SMI service\n"); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 2896aafc9520..dcdd72260cc9 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -390,6 +390,10 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; + ret = intel_gvt_hypervisor_set_opregion(vgpu); + if (ret) + goto out_clean_sched_policy; + mutex_unlock(&gvt->lock); return vgpu; From 9f31d1063b434c2d54317461d78285b08538c01a Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 23 Nov 2017 16:26:32 +0800 Subject: [PATCH 05/87] drm/i915/gvt: Add framebuffer decoder support This patch is to introduce the framebuffer decoder which can decode guest OS's framebuffer information, including primary, cursor and sprite plane. v16: - rebase to 4.14.0-rc6. v14: - refine pixel format table. (Zhenyu) v9: - move drm format change to a separate patch. (Xiaoguang) v8: - fix a bug in decoding primary plane. (Tina) v7: - refine framebuffer decoder code. (Zhenyu) Signed-off-by: Tina Zhang Cc: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/Makefile | 3 +- drivers/gpu/drm/i915/gvt/display.c | 2 +- drivers/gpu/drm/i915/gvt/display.h | 2 + drivers/gpu/drm/i915/gvt/fb_decoder.c | 507 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/fb_decoder.h | 169 +++++++++ drivers/gpu/drm/i915/gvt/gvt.h | 1 + 6 files changed, 682 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/gvt/fb_decoder.c create mode 100644 drivers/gpu/drm/i915/gvt/fb_decoder.h diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index 7100240a4a81..54d70df96494 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -1,7 +1,8 @@ GVT_DIR := gvt GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ - execlist.o scheduler.o sched_policy.o render.o cmd_parser.o debugfs.o + execlist.o scheduler.o sched_policy.o render.o cmd_parser.o debugfs.o \ + fb_decoder.o ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 3c318439a659..fb7fdbacfe64 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -67,7 +67,7 @@ static int edp_pipe_is_enabled(struct intel_vgpu *vgpu) return 1; } -static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe) +int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index d73de22102e2..b46b86892d58 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -179,4 +179,6 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution); void intel_vgpu_reset_display(struct intel_vgpu *vgpu); void intel_vgpu_clean_display(struct intel_vgpu *vgpu); +int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe); + #endif diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c new file mode 100644 index 000000000000..6c99c64d8e14 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -0,0 +1,507 @@ +/* + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Kevin Tian + * + * Contributors: + * Bing Niu + * Xu Han + * Ping Gao + * Xiaoguang Chen + * Yang Liu + * Tina Zhang + * + */ + +#include +#include "i915_drv.h" +#include "gvt.h" + +#define PRIMARY_FORMAT_NUM 16 +struct pixel_format { + int drm_format; /* Pixel format in DRM definition */ + int bpp; /* Bits per pixel, 0 indicates invalid */ + char *desc; /* The description */ +}; + +static struct pixel_format bdw_pixel_formats[] = { + {DRM_FORMAT_C8, 8, "8-bit Indexed"}, + {DRM_FORMAT_RGB565, 16, "16-bit BGRX (5:6:5 MSB-R:G:B)"}, + {DRM_FORMAT_XRGB8888, 32, "32-bit BGRX (8:8:8:8 MSB-X:R:G:B)"}, + {DRM_FORMAT_XBGR2101010, 32, "32-bit RGBX (2:10:10:10 MSB-X:B:G:R)"}, + + {DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"}, + {DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"}, + + /* non-supported format has bpp default to 0 */ + {0, 0, NULL}, +}; + +static struct pixel_format skl_pixel_formats[] = { + {DRM_FORMAT_YUYV, 16, "16-bit packed YUYV (8:8:8:8 MSB-V:Y2:U:Y1)"}, + {DRM_FORMAT_UYVY, 16, "16-bit packed UYVY (8:8:8:8 MSB-Y2:V:Y1:U)"}, + {DRM_FORMAT_YVYU, 16, "16-bit packed YVYU (8:8:8:8 MSB-U:Y2:V:Y1)"}, + {DRM_FORMAT_VYUY, 16, "16-bit packed VYUY (8:8:8:8 MSB-Y2:U:Y1:V)"}, + + {DRM_FORMAT_C8, 8, "8-bit Indexed"}, + {DRM_FORMAT_RGB565, 16, "16-bit BGRX (5:6:5 MSB-R:G:B)"}, + {DRM_FORMAT_ABGR8888, 32, "32-bit RGBA (8:8:8:8 MSB-A:B:G:R)"}, + {DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"}, + + {DRM_FORMAT_ARGB8888, 32, "32-bit BGRA (8:8:8:8 MSB-A:R:G:B)"}, + {DRM_FORMAT_XRGB8888, 32, "32-bit BGRX (8:8:8:8 MSB-X:R:G:B)"}, + {DRM_FORMAT_XBGR2101010, 32, "32-bit RGBX (2:10:10:10 MSB-X:B:G:R)"}, + {DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"}, + + /* non-supported format has bpp default to 0 */ + {0, 0, NULL}, +}; + +static int bdw_format_to_drm(int format) +{ + int bdw_pixel_formats_index = 6; + + switch (format) { + case DISPPLANE_8BPP: + bdw_pixel_formats_index = 0; + break; + case DISPPLANE_BGRX565: + bdw_pixel_formats_index = 1; + break; + case DISPPLANE_BGRX888: + bdw_pixel_formats_index = 2; + break; + case DISPPLANE_RGBX101010: + bdw_pixel_formats_index = 3; + break; + case DISPPLANE_BGRX101010: + bdw_pixel_formats_index = 4; + break; + case DISPPLANE_RGBX888: + bdw_pixel_formats_index = 5; + break; + + default: + break; + } + + return bdw_pixel_formats_index; +} + +static int skl_format_to_drm(int format, bool rgb_order, bool alpha, + int yuv_order) +{ + int skl_pixel_formats_index = 12; + + switch (format) { + case PLANE_CTL_FORMAT_INDEXED: + skl_pixel_formats_index = 4; + break; + case PLANE_CTL_FORMAT_RGB_565: + skl_pixel_formats_index = 5; + break; + case PLANE_CTL_FORMAT_XRGB_8888: + if (rgb_order) + skl_pixel_formats_index = alpha ? 6 : 7; + else + skl_pixel_formats_index = alpha ? 8 : 9; + break; + case PLANE_CTL_FORMAT_XRGB_2101010: + skl_pixel_formats_index = rgb_order ? 10 : 11; + break; + case PLANE_CTL_FORMAT_YUV422: + skl_pixel_formats_index = yuv_order >> 16; + if (skl_pixel_formats_index > 3) + return -EINVAL; + break; + + default: + break; + } + + return skl_pixel_formats_index; +} + +static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe, + u32 tiled, int stride_mask, int bpp) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + + u32 stride_reg = vgpu_vreg(vgpu, DSPSTRIDE(pipe)) & stride_mask; + u32 stride = stride_reg; + + if (IS_SKYLAKE(dev_priv)) { + switch (tiled) { + case PLANE_CTL_TILED_LINEAR: + stride = stride_reg * 64; + break; + case PLANE_CTL_TILED_X: + stride = stride_reg * 512; + break; + case PLANE_CTL_TILED_Y: + stride = stride_reg * 128; + break; + case PLANE_CTL_TILED_YF: + if (bpp == 8) + stride = stride_reg * 64; + else if (bpp == 16 || bpp == 32 || bpp == 64) + stride = stride_reg * 128; + else + gvt_dbg_core("skl: unsupported bpp:%d\n", bpp); + break; + default: + gvt_dbg_core("skl: unsupported tile format:%x\n", + tiled); + } + } + + return stride; +} + +static int get_active_pipe(struct intel_vgpu *vgpu) +{ + int i; + + for (i = 0; i < I915_MAX_PIPES; i++) + if (pipe_is_enabled(vgpu, i)) + break; + + return i; +} + +/** + * intel_vgpu_decode_primary_plane - Decode primary plane + * @vgpu: input vgpu + * @plane: primary plane to save decoded info + * This function is called for decoding plane + * + * Returns: + * 0 on success, non-zero if failed. + */ +int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_primary_plane_format *plane) +{ + u32 val, fmt; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + int pipe; + + pipe = get_active_pipe(vgpu); + if (pipe >= I915_MAX_PIPES) + return -ENODEV; + + val = vgpu_vreg(vgpu, DSPCNTR(pipe)); + plane->enabled = !!(val & DISPLAY_PLANE_ENABLE); + if (!plane->enabled) + return -ENODEV; + + if (IS_SKYLAKE(dev_priv)) { + plane->tiled = (val & PLANE_CTL_TILED_MASK) >> + _PLANE_CTL_TILED_SHIFT; + fmt = skl_format_to_drm( + val & PLANE_CTL_FORMAT_MASK, + val & PLANE_CTL_ORDER_RGBX, + val & PLANE_CTL_ALPHA_MASK, + val & PLANE_CTL_YUV422_ORDER_MASK); + plane->bpp = skl_pixel_formats[fmt].bpp; + plane->drm_format = skl_pixel_formats[fmt].drm_format; + } else { + plane->tiled = !!(val & DISPPLANE_TILED); + fmt = bdw_format_to_drm(val & DISPPLANE_PIXFORMAT_MASK); + plane->bpp = bdw_pixel_formats[fmt].bpp; + plane->drm_format = bdw_pixel_formats[fmt].drm_format; + } + + if (!plane->bpp) { + gvt_vgpu_err("Non-supported pixel format (0x%x)\n", fmt); + return -EINVAL; + } + + plane->hw_format = fmt; + + plane->base = vgpu_vreg(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); + if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->stride = intel_vgpu_get_stride(vgpu, pipe, (plane->tiled << 10), + (IS_SKYLAKE(dev_priv)) ? (_PRI_PLANE_STRIDE_MASK >> 6) : + _PRI_PLANE_STRIDE_MASK, plane->bpp); + + plane->width = (vgpu_vreg(vgpu, PIPESRC(pipe)) & _PIPE_H_SRCSZ_MASK) >> + _PIPE_H_SRCSZ_SHIFT; + plane->width += 1; + plane->height = (vgpu_vreg(vgpu, PIPESRC(pipe)) & + _PIPE_V_SRCSZ_MASK) >> _PIPE_V_SRCSZ_SHIFT; + plane->height += 1; /* raw height is one minus the real value */ + + val = vgpu_vreg(vgpu, DSPTILEOFF(pipe)); + plane->x_offset = (val & _PRI_PLANE_X_OFF_MASK) >> + _PRI_PLANE_X_OFF_SHIFT; + plane->y_offset = (val & _PRI_PLANE_Y_OFF_MASK) >> + _PRI_PLANE_Y_OFF_SHIFT; + + return 0; +} + +#define CURSOR_FORMAT_NUM (1 << 6) +struct cursor_mode_format { + int drm_format; /* Pixel format in DRM definition */ + u8 bpp; /* Bits per pixel; 0 indicates invalid */ + u32 width; /* In pixel */ + u32 height; /* In lines */ + char *desc; /* The description */ +}; + +static struct cursor_mode_format cursor_pixel_formats[] = { + {DRM_FORMAT_ARGB8888, 32, 128, 128, "128x128 32bpp ARGB"}, + {DRM_FORMAT_ARGB8888, 32, 256, 256, "256x256 32bpp ARGB"}, + {DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"}, + {DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"}, + + /* non-supported format has bpp default to 0 */ + {0, 0, 0, 0, NULL}, +}; + +static int cursor_mode_to_drm(int mode) +{ + int cursor_pixel_formats_index = 4; + + switch (mode) { + case CURSOR_MODE_128_ARGB_AX: + cursor_pixel_formats_index = 0; + break; + case CURSOR_MODE_256_ARGB_AX: + cursor_pixel_formats_index = 1; + break; + case CURSOR_MODE_64_ARGB_AX: + cursor_pixel_formats_index = 2; + break; + case CURSOR_MODE_64_32B_AX: + cursor_pixel_formats_index = 3; + break; + + default: + break; + } + + return cursor_pixel_formats_index; +} + +/** + * intel_vgpu_decode_cursor_plane - Decode sprite plane + * @vgpu: input vgpu + * @plane: cursor plane to save decoded info + * This function is called for decoding plane + * + * Returns: + * 0 on success, non-zero if failed. + */ +int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_cursor_plane_format *plane) +{ + u32 val, mode, index; + u32 alpha_plane, alpha_force; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + int pipe; + + pipe = get_active_pipe(vgpu); + if (pipe >= I915_MAX_PIPES) + return -ENODEV; + + val = vgpu_vreg(vgpu, CURCNTR(pipe)); + mode = val & CURSOR_MODE; + plane->enabled = (mode != CURSOR_MODE_DISABLE); + if (!plane->enabled) + return -ENODEV; + + index = cursor_mode_to_drm(mode); + + if (!cursor_pixel_formats[index].bpp) { + gvt_vgpu_err("Non-supported cursor mode (0x%x)\n", mode); + return -EINVAL; + } + plane->mode = mode; + plane->bpp = cursor_pixel_formats[index].bpp; + plane->drm_format = cursor_pixel_formats[index].drm_format; + plane->width = cursor_pixel_formats[index].width; + plane->height = cursor_pixel_formats[index].height; + + alpha_plane = (val & _CURSOR_ALPHA_PLANE_MASK) >> + _CURSOR_ALPHA_PLANE_SHIFT; + alpha_force = (val & _CURSOR_ALPHA_FORCE_MASK) >> + _CURSOR_ALPHA_FORCE_SHIFT; + if (alpha_plane || alpha_force) + gvt_dbg_core("alpha_plane=0x%x, alpha_force=0x%x\n", + alpha_plane, alpha_force); + + plane->base = vgpu_vreg(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); + if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + val = vgpu_vreg(vgpu, CURPOS(pipe)); + plane->x_pos = (val & _CURSOR_POS_X_MASK) >> _CURSOR_POS_X_SHIFT; + plane->x_sign = (val & _CURSOR_SIGN_X_MASK) >> _CURSOR_SIGN_X_SHIFT; + plane->y_pos = (val & _CURSOR_POS_Y_MASK) >> _CURSOR_POS_Y_SHIFT; + plane->y_sign = (val & _CURSOR_SIGN_Y_MASK) >> _CURSOR_SIGN_Y_SHIFT; + + return 0; +} + +#define SPRITE_FORMAT_NUM (1 << 3) + +static struct pixel_format sprite_pixel_formats[SPRITE_FORMAT_NUM] = { + [0x0] = {DRM_FORMAT_YUV422, 16, "YUV 16-bit 4:2:2 packed"}, + [0x1] = {DRM_FORMAT_XRGB2101010, 32, "RGB 32-bit 2:10:10:10"}, + [0x2] = {DRM_FORMAT_XRGB8888, 32, "RGB 32-bit 8:8:8:8"}, + [0x4] = {DRM_FORMAT_AYUV, 32, + "YUV 32-bit 4:4:4 packed (8:8:8:8 MSB-X:Y:U:V)"}, +}; + +/** + * intel_vgpu_decode_sprite_plane - Decode sprite plane + * @vgpu: input vgpu + * @plane: sprite plane to save decoded info + * This function is called for decoding plane + * + * Returns: + * 0 on success, non-zero if failed. + */ +int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_sprite_plane_format *plane) +{ + u32 val, fmt; + u32 color_order, yuv_order; + int drm_format; + int pipe; + + pipe = get_active_pipe(vgpu); + if (pipe >= I915_MAX_PIPES) + return -ENODEV; + + val = vgpu_vreg(vgpu, SPRCTL(pipe)); + plane->enabled = !!(val & SPRITE_ENABLE); + if (!plane->enabled) + return -ENODEV; + + plane->tiled = !!(val & SPRITE_TILED); + color_order = !!(val & SPRITE_RGB_ORDER_RGBX); + yuv_order = (val & SPRITE_YUV_BYTE_ORDER_MASK) >> + _SPRITE_YUV_ORDER_SHIFT; + + fmt = (val & SPRITE_PIXFORMAT_MASK) >> _SPRITE_FMT_SHIFT; + if (!sprite_pixel_formats[fmt].bpp) { + gvt_vgpu_err("Non-supported pixel format (0x%x)\n", fmt); + return -EINVAL; + } + plane->hw_format = fmt; + plane->bpp = sprite_pixel_formats[fmt].bpp; + drm_format = sprite_pixel_formats[fmt].drm_format; + + /* Order of RGB values in an RGBxxx buffer may be ordered RGB or + * BGR depending on the state of the color_order field + */ + if (!color_order) { + if (drm_format == DRM_FORMAT_XRGB2101010) + drm_format = DRM_FORMAT_XBGR2101010; + else if (drm_format == DRM_FORMAT_XRGB8888) + drm_format = DRM_FORMAT_XBGR8888; + } + + if (drm_format == DRM_FORMAT_YUV422) { + switch (yuv_order) { + case 0: + drm_format = DRM_FORMAT_YUYV; + break; + case 1: + drm_format = DRM_FORMAT_UYVY; + break; + case 2: + drm_format = DRM_FORMAT_YVYU; + break; + case 3: + drm_format = DRM_FORMAT_VYUY; + break; + default: + /* yuv_order has only 2 bits */ + break; + } + } + + plane->drm_format = drm_format; + + plane->base = vgpu_vreg(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); + if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { + gvt_vgpu_err("invalid gma address: %lx\n", + (unsigned long)plane->base); + return -EINVAL; + } + + plane->stride = vgpu_vreg(vgpu, SPRSTRIDE(pipe)) & + _SPRITE_STRIDE_MASK; + + val = vgpu_vreg(vgpu, SPRSIZE(pipe)); + plane->height = (val & _SPRITE_SIZE_HEIGHT_MASK) >> + _SPRITE_SIZE_HEIGHT_SHIFT; + plane->width = (val & _SPRITE_SIZE_WIDTH_MASK) >> + _SPRITE_SIZE_WIDTH_SHIFT; + plane->height += 1; /* raw height is one minus the real value */ + plane->width += 1; /* raw width is one minus the real value */ + + val = vgpu_vreg(vgpu, SPRPOS(pipe)); + plane->x_pos = (val & _SPRITE_POS_X_MASK) >> _SPRITE_POS_X_SHIFT; + plane->y_pos = (val & _SPRITE_POS_Y_MASK) >> _SPRITE_POS_Y_SHIFT; + + val = vgpu_vreg(vgpu, SPROFFSET(pipe)); + plane->x_offset = (val & _SPRITE_OFFSET_START_X_MASK) >> + _SPRITE_OFFSET_START_X_SHIFT; + plane->y_offset = (val & _SPRITE_OFFSET_START_Y_MASK) >> + _SPRITE_OFFSET_START_Y_SHIFT; + + return 0; +} diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.h b/drivers/gpu/drm/i915/gvt/fb_decoder.h new file mode 100644 index 000000000000..cb055f3c81a2 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.h @@ -0,0 +1,169 @@ +/* + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Kevin Tian + * + * Contributors: + * Bing Niu + * Xu Han + * Ping Gao + * Xiaoguang Chen + * Yang Liu + * Tina Zhang + * + */ + +#ifndef _GVT_FB_DECODER_H_ +#define _GVT_FB_DECODER_H_ + +#define _PLANE_CTL_FORMAT_SHIFT 24 +#define _PLANE_CTL_TILED_SHIFT 10 +#define _PIPE_V_SRCSZ_SHIFT 0 +#define _PIPE_V_SRCSZ_MASK (0xfff << _PIPE_V_SRCSZ_SHIFT) +#define _PIPE_H_SRCSZ_SHIFT 16 +#define _PIPE_H_SRCSZ_MASK (0x1fff << _PIPE_H_SRCSZ_SHIFT) + +#define _PRI_PLANE_FMT_SHIFT 26 +#define _PRI_PLANE_STRIDE_MASK (0x3ff << 6) +#define _PRI_PLANE_X_OFF_SHIFT 0 +#define _PRI_PLANE_X_OFF_MASK (0x1fff << _PRI_PLANE_X_OFF_SHIFT) +#define _PRI_PLANE_Y_OFF_SHIFT 16 +#define _PRI_PLANE_Y_OFF_MASK (0xfff << _PRI_PLANE_Y_OFF_SHIFT) + +#define _CURSOR_MODE 0x3f +#define _CURSOR_ALPHA_FORCE_SHIFT 8 +#define _CURSOR_ALPHA_FORCE_MASK (0x3 << _CURSOR_ALPHA_FORCE_SHIFT) +#define _CURSOR_ALPHA_PLANE_SHIFT 10 +#define _CURSOR_ALPHA_PLANE_MASK (0x3 << _CURSOR_ALPHA_PLANE_SHIFT) +#define _CURSOR_POS_X_SHIFT 0 +#define _CURSOR_POS_X_MASK (0x1fff << _CURSOR_POS_X_SHIFT) +#define _CURSOR_SIGN_X_SHIFT 15 +#define _CURSOR_SIGN_X_MASK (1 << _CURSOR_SIGN_X_SHIFT) +#define _CURSOR_POS_Y_SHIFT 16 +#define _CURSOR_POS_Y_MASK (0xfff << _CURSOR_POS_Y_SHIFT) +#define _CURSOR_SIGN_Y_SHIFT 31 +#define _CURSOR_SIGN_Y_MASK (1 << _CURSOR_SIGN_Y_SHIFT) + +#define _SPRITE_FMT_SHIFT 25 +#define _SPRITE_COLOR_ORDER_SHIFT 20 +#define _SPRITE_YUV_ORDER_SHIFT 16 +#define _SPRITE_STRIDE_SHIFT 6 +#define _SPRITE_STRIDE_MASK (0x1ff << _SPRITE_STRIDE_SHIFT) +#define _SPRITE_SIZE_WIDTH_SHIFT 0 +#define _SPRITE_SIZE_HEIGHT_SHIFT 16 +#define _SPRITE_SIZE_WIDTH_MASK (0x1fff << _SPRITE_SIZE_WIDTH_SHIFT) +#define _SPRITE_SIZE_HEIGHT_MASK (0xfff << _SPRITE_SIZE_HEIGHT_SHIFT) +#define _SPRITE_POS_X_SHIFT 0 +#define _SPRITE_POS_Y_SHIFT 16 +#define _SPRITE_POS_X_MASK (0x1fff << _SPRITE_POS_X_SHIFT) +#define _SPRITE_POS_Y_MASK (0xfff << _SPRITE_POS_Y_SHIFT) +#define _SPRITE_OFFSET_START_X_SHIFT 0 +#define _SPRITE_OFFSET_START_Y_SHIFT 16 +#define _SPRITE_OFFSET_START_X_MASK (0x1fff << _SPRITE_OFFSET_START_X_SHIFT) +#define _SPRITE_OFFSET_START_Y_MASK (0xfff << _SPRITE_OFFSET_START_Y_SHIFT) + +enum GVT_FB_EVENT { + FB_MODE_SET_START = 1, + FB_MODE_SET_END, + FB_DISPLAY_FLIP, +}; + +enum DDI_PORT { + DDI_PORT_NONE = 0, + DDI_PORT_B = 1, + DDI_PORT_C = 2, + DDI_PORT_D = 3, + DDI_PORT_E = 4 +}; + +struct intel_gvt; + +/* color space conversion and gamma correction are not included */ +struct intel_vgpu_primary_plane_format { + u8 enabled; /* plane is enabled */ + u8 tiled; /* X-tiled */ + u8 bpp; /* bits per pixel */ + u32 hw_format; /* format field in the PRI_CTL register */ + u32 drm_format; /* format in DRM definition */ + u32 base; /* framebuffer base in graphics memory */ + u64 base_gpa; + u32 x_offset; /* in pixels */ + u32 y_offset; /* in lines */ + u32 width; /* in pixels */ + u32 height; /* in lines */ + u32 stride; /* in bytes */ +}; + +struct intel_vgpu_sprite_plane_format { + u8 enabled; /* plane is enabled */ + u8 tiled; /* X-tiled */ + u8 bpp; /* bits per pixel */ + u32 hw_format; /* format field in the SPR_CTL register */ + u32 drm_format; /* format in DRM definition */ + u32 base; /* sprite base in graphics memory */ + u64 base_gpa; + u32 x_pos; /* in pixels */ + u32 y_pos; /* in lines */ + u32 x_offset; /* in pixels */ + u32 y_offset; /* in lines */ + u32 width; /* in pixels */ + u32 height; /* in lines */ + u32 stride; /* in bytes */ +}; + +struct intel_vgpu_cursor_plane_format { + u8 enabled; + u8 mode; /* cursor mode select */ + u8 bpp; /* bits per pixel */ + u32 drm_format; /* format in DRM definition */ + u32 base; /* cursor base in graphics memory */ + u64 base_gpa; + u32 x_pos; /* in pixels */ + u32 y_pos; /* in lines */ + u8 x_sign; /* X Position Sign */ + u8 y_sign; /* Y Position Sign */ + u32 width; /* in pixels */ + u32 height; /* in lines */ + u32 x_hot; /* in pixels */ + u32 y_hot; /* in pixels */ +}; + +struct intel_vgpu_pipe_format { + struct intel_vgpu_primary_plane_format primary; + struct intel_vgpu_sprite_plane_format sprite; + struct intel_vgpu_cursor_plane_format cursor; + enum DDI_PORT ddi_port; /* the DDI port that pipe is connected to */ +}; + +struct intel_vgpu_fb_format { + struct intel_vgpu_pipe_format pipes[I915_MAX_PIPES]; +}; + +int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_primary_plane_format *plane); +int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_cursor_plane_format *plane); +int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, + struct intel_vgpu_sprite_plane_format *plane); + +#endif diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index f3999f251cd5..357d458cded0 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -46,6 +46,7 @@ #include "sched_policy.h" #include "render.h" #include "cmd_parser.h" +#include "fb_decoder.h" #define GVT_MAX_VGPU 8 From e20eaa2382e7888a4e06ccb015c476a6fb1fda0c Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 23 Nov 2017 16:26:35 +0800 Subject: [PATCH 06/87] vfio: ABI for mdev display dma-buf operation Add VFIO_DEVICE_QUERY_GFX_PLANE ioctl command to let user query and get a plane and its information. So far, two types of buffers are supported: buffers based on dma-buf and buffers based on region. This ioctl can be invoked with: 1) Either DMABUF or REGION flag. Vendor driver returns a plane_info successfully only when the specific kind of buffer is supported. 2) Flag PROBE. And at the same time either DMABUF or REGION must be set, so that vendor driver returns success only when the specific kind of buffer is supported. Add VFIO_DEVICE_GET_GFX_DMABUF ioctl command to let user get a specific dma-buf fd of an exposed MDEV buffer provided by dmabuf_id which was returned in VFIO_DEVICE_QUERY_GFX_PLANE ioctl command. The life cycle of an exposed MDEV buffer is handled by userspace and tracked by kernel space. The returned dmabuf_id in struct vfio_device_ query_gfx_plane can be a new id of a new exposed buffer or an old id of a re-exported buffer. Host user can check the value of dmabuf_id to see if it needs to create new resources according to the new exposed buffer or just re-use the existing resource related to the old buffer. v18: - update comments for VFIO_DEVICE_GET_GFX_DMABUF. (Alex) v17: - modify VFIO_DEVICE_GET_GFX_DMABUF interface. (Alex) v16: - add x_hot and y_hot fields. (Gerd) - add comments for VFIO_DEVICE_GET_GFX_DMABUF. (Alex) - rebase to 4.14.0-rc6. v15: - add a ioctl to get a dmabuf for a given dmabuf id. (Gerd) v14: - add PROBE, DMABUF and REGION flags. (Alex) v12: - add drm_format_mod back. (Gerd and Zhenyu) - add region_index. (Gerd) v11: - rename plane_type to drm_plane_type. (Gerd) - move fields of vfio_device_query_gfx_plane to vfio_device_gfx_plane_info. (Gerd) - remove drm_format_mod, start fields. (Daniel) - remove plane_id. v10: - refine the ABI API VFIO_DEVICE_QUERY_GFX_PLANE. (Alex) (Gerd) v3: - add a field gvt_plane_info in the drm_i915_gem_obj structure to save the decoded plane information to avoid look up while need the plane info. (Gerd) Signed-off-by: Tina Zhang Reviewed-by: Gerd Hoffmann Reviewed-by: Kirti Wankhede Acked-by: Alex Williamson Cc: Daniel Vetter Signed-off-by: Zhenyu Wang --- include/uapi/linux/vfio.h | 62 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index ae461050661a..5c1cca2ba04d 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -502,6 +502,68 @@ struct vfio_pci_hot_reset { #define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13) +/** + * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_device_query_gfx_plane) + * + * Set the drm_plane_type and flags, then retrieve the gfx plane info. + * + * flags supported: + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_DMABUF are set + * to ask if the mdev supports dma-buf. 0 on support, -EINVAL on no + * support for dma-buf. + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_REGION are set + * to ask if the mdev supports region. 0 on support, -EINVAL on no + * support for region. + * - VFIO_GFX_PLANE_TYPE_DMABUF or VFIO_GFX_PLANE_TYPE_REGION is set + * with each call to query the plane info. + * - Others are invalid and return -EINVAL. + * + * Note: + * 1. Plane could be disabled by guest. In that case, success will be + * returned with zero-initialized drm_format, size, width and height + * fields. + * 2. x_hot/y_hot is set to 0xFFFFFFFF if no hotspot information available + * + * Return: 0 on success, -errno on other failure. + */ +struct vfio_device_gfx_plane_info { + __u32 argsz; + __u32 flags; +#define VFIO_GFX_PLANE_TYPE_PROBE (1 << 0) +#define VFIO_GFX_PLANE_TYPE_DMABUF (1 << 1) +#define VFIO_GFX_PLANE_TYPE_REGION (1 << 2) + /* in */ + __u32 drm_plane_type; /* type of plane: DRM_PLANE_TYPE_* */ + /* out */ + __u32 drm_format; /* drm format of plane */ + __u64 drm_format_mod; /* tiled mode */ + __u32 width; /* width of plane */ + __u32 height; /* height of plane */ + __u32 stride; /* stride of plane */ + __u32 size; /* size of plane in bytes, align on page*/ + __u32 x_pos; /* horizontal position of cursor plane */ + __u32 y_pos; /* vertical position of cursor plane*/ + __u32 x_hot; /* horizontal position of cursor hotspot */ + __u32 y_hot; /* vertical position of cursor hotspot */ + union { + __u32 region_index; /* region index */ + __u32 dmabuf_id; /* dma-buf id */ + }; +}; + +#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14) + +/** + * VFIO_DEVICE_GET_GFX_DMABUF - _IOW(VFIO_TYPE, VFIO_BASE + 15, __u32) + * + * Return a new dma-buf file descriptor for an exposed guest framebuffer + * described by the provided dmabuf_id. The dmabuf_id is returned from VFIO_ + * DEVICE_QUERY_GFX_PLANE as a token of the exposed guest framebuffer. + */ + +#define VFIO_DEVICE_GET_GFX_DMABUF _IO(VFIO_TYPE, VFIO_BASE + 15) + /* -------- API for Type1 VFIO IOMMU -------- */ /** From e546e281d33d1fc275651aa06f0659045db67e68 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 23 Nov 2017 16:26:36 +0800 Subject: [PATCH 07/87] drm/i915/gvt: Dmabuf support for GVT-g This patch introduces a guest's framebuffer sharing mechanism based on dma-buf subsystem. With this sharing mechanism, guest's framebuffer can be shared between guest VM and host. v17: - modify VFIO_DEVICE_GET_GFX_DMABUF interface. (Alex) v16: - add x_hot and y_hot. (Gerd) - add flag validation for VFIO_DEVICE_GET_GFX_DMABUF. (Alex) - rebase 4.14.0-rc6. v15: - add VFIO_DEVICE_GET_GFX_DMABUF ABI. (Gerd) - add intel_vgpu_dmabuf_cleanup() to clean up the vGPU's dmabuf. (Gerd) v14: - add PROBE, DMABUF and REGION flags. (Alex) v12: - refine the lifecycle of dmabuf. v9: - remove dma-buf management. (Alex) - track the dma-buf create and release in kernel mode. (Gerd) (Daniel) v8: - refine the dma-buf ioctl definition.(Alex) - add a lock to protect the dmabuf list. (Alex) v7: - release dma-buf related allocations in dma-buf's associated release function. (Alex) - refine ioctl interface for querying plane info or create dma-buf. (Alex) v6: - align the dma-buf life cycle with the vfio device. (Alex) - add the dma-buf related operations in a separate patch. (Gerd) - i915 related changes. (Chris) v5: - fix bug while checking whether the gem obj is gvt's dma-buf when user change caching mode or domains. Add a helper function to do it. (Xiaoguang) - add definition for the query plane and create dma-buf. (Xiaoguang) v4: - fix bug while checking whether the gem obj is gvt's dma-buf when set caching mode or doamins. (Xiaoguang) v3: - declare a new flag I915_GEM_OBJECT_IS_GVT_DMABUF in drm_i915_gem_object to represent the gem obj for gvt's dma-buf. The tiling mode, caching mode and domains can not be changed for this kind of gem object. (Alex) - change dma-buf related information to be more generic. So other vendor can use the same interface. (Alex) v2: - create a management fd for dma-buf operations. (Alex) - alloc gem object's backing storage in gem obj's get_pages() callback. (Chris) Signed-off-by: Tina Zhang Cc: Alex Williamson Cc: Chris Wilson Cc: Daniel Vetter Cc: Gerd Hoffmann Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/Makefile | 2 +- drivers/gpu/drm/i915/gvt/dmabuf.c | 523 +++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/dmabuf.h | 67 ++++ drivers/gpu/drm/i915/gvt/gvt.c | 2 + drivers/gpu/drm/i915/gvt/gvt.h | 11 + drivers/gpu/drm/i915/gvt/hypercall.h | 2 + drivers/gpu/drm/i915/gvt/kvmgt.c | 55 ++- drivers/gpu/drm/i915/gvt/mpt.h | 30 ++ drivers/gpu/drm/i915/gvt/vgpu.c | 5 +- drivers/gpu/drm/i915/i915_gem_object.h | 2 + 10 files changed, 696 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/gvt/dmabuf.c create mode 100644 drivers/gpu/drm/i915/gvt/dmabuf.h diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index 54d70df96494..cae06c1dcdcd 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -2,7 +2,7 @@ GVT_DIR := gvt GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ execlist.o scheduler.o sched_policy.o render.o cmd_parser.o debugfs.o \ - fb_decoder.o + fb_decoder.o dmabuf.o ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c new file mode 100644 index 000000000000..190710c8d3e5 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -0,0 +1,523 @@ +/* + * Copyright 2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Zhiyuan Lv + * + * Contributors: + * Xiaoguang Chen + * Tina Zhang + */ + +#include +#include +#include + +#include "i915_drv.h" +#include "gvt.h" + +#define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12)) + +static int vgpu_gem_get_pages( + struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct sg_table *st; + struct scatterlist *sg; + int i, ret; + gen8_pte_t __iomem *gtt_entries; + struct intel_vgpu_fb_info *fb_info; + + fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info; + if (WARN_ON(!fb_info)) + return -ENODEV; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (unlikely(!st)) + return -ENOMEM; + + ret = sg_alloc_table(st, fb_info->size, GFP_KERNEL); + if (ret) { + kfree(st); + return ret; + } + gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + + (fb_info->start >> PAGE_SHIFT); + for_each_sg(st->sgl, sg, fb_info->size, i) { + sg->offset = 0; + sg->length = PAGE_SIZE; + sg_dma_address(sg) = + GEN8_DECODE_PTE(readq(>t_entries[i])); + sg_dma_len(sg) = PAGE_SIZE; + } + + __i915_gem_object_set_pages(obj, st, PAGE_SIZE); + + return 0; +} + +static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static void dmabuf_gem_object_free(struct kref *kref) +{ + struct intel_vgpu_dmabuf_obj *obj = + container_of(kref, struct intel_vgpu_dmabuf_obj, kref); + struct intel_vgpu *vgpu = obj->vgpu; + struct list_head *pos; + + struct intel_vgpu_dmabuf_obj *dmabuf_obj; + + list_for_each(pos, &vgpu->dmabuf_obj_list_head) { + dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, + list); + if (dmabuf_obj == obj) { + idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id); + kfree(dmabuf_obj->info); + kfree(dmabuf_obj); + list_del(pos); + break; + } + } +} + + +static inline void dmabuf_obj_get(struct intel_vgpu_dmabuf_obj *obj) +{ + kref_get(&obj->kref); +} + +static inline void dmabuf_obj_put(struct intel_vgpu_dmabuf_obj *obj) +{ + kref_put(&obj->kref, dmabuf_gem_object_free); +} + +static void vgpu_gem_release(struct drm_i915_gem_object *gem_obj) +{ + + struct intel_vgpu_fb_info *fb_info = gem_obj->gvt_info; + struct intel_vgpu_dmabuf_obj *obj = fb_info->obj; + struct intel_vgpu *vgpu = obj->vgpu; + + mutex_lock(&vgpu->dmabuf_lock); + gem_obj->base.dma_buf = NULL; + dmabuf_obj_put(obj); + intel_gvt_hypervisor_put_vfio_device(vgpu); + mutex_unlock(&vgpu->dmabuf_lock); +} + +static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = { + .flags = I915_GEM_OBJECT_IS_PROXY, + .get_pages = vgpu_gem_get_pages, + .put_pages = vgpu_gem_put_pages, + .release = vgpu_gem_release, +}; + +static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, + struct intel_vgpu_fb_info *info) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_alloc(dev_priv); + if (obj == NULL) + return NULL; + + drm_gem_private_object_init(dev, &obj->base, + info->size << PAGE_SHIFT); + i915_gem_object_init(obj, &intel_vgpu_gem_ops); + + obj->base.read_domains = I915_GEM_DOMAIN_GTT; + obj->base.write_domain = 0; + if (IS_SKYLAKE(dev_priv)) { + unsigned int tiling_mode = 0; + unsigned int stride = 0; + + switch (info->drm_format_mod << 10) { + case PLANE_CTL_TILED_LINEAR: + tiling_mode = I915_TILING_NONE; + break; + case PLANE_CTL_TILED_X: + tiling_mode = I915_TILING_X; + stride = info->stride; + break; + case PLANE_CTL_TILED_Y: + tiling_mode = I915_TILING_Y; + stride = info->stride; + break; + default: + gvt_dbg_core("not supported tiling mode\n"); + } + obj->tiling_and_stride = tiling_mode | stride; + } else { + obj->tiling_and_stride = info->drm_format_mod ? + I915_TILING_X : 0; + } + + return obj; +} + +static int vgpu_get_plane_info(struct drm_device *dev, + struct intel_vgpu *vgpu, + struct intel_vgpu_fb_info *info, + int plane_id) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_vgpu_primary_plane_format p; + struct intel_vgpu_cursor_plane_format c; + int ret; + + if (plane_id == DRM_PLANE_TYPE_PRIMARY) { + ret = intel_vgpu_decode_primary_plane(vgpu, &p); + if (ret) + return ret; + info->start = p.base; + info->start_gpa = p.base_gpa; + info->width = p.width; + info->height = p.height; + info->stride = p.stride; + info->drm_format = p.drm_format; + info->drm_format_mod = p.tiled; + info->size = (((p.stride * p.height * p.bpp) / 8) + + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + } else if (plane_id == DRM_PLANE_TYPE_CURSOR) { + ret = intel_vgpu_decode_cursor_plane(vgpu, &c); + if (ret) + return ret; + info->start = c.base; + info->start_gpa = c.base_gpa; + info->width = c.width; + info->height = c.height; + info->stride = c.width * (c.bpp / 8); + info->drm_format = c.drm_format; + info->drm_format_mod = 0; + info->x_pos = c.x_pos; + info->y_pos = c.y_pos; + + /* The invalid cursor hotspot value is delivered to host + * until we find a way to get the cursor hotspot info of + * guest OS. + */ + info->x_hot = UINT_MAX; + info->y_hot = UINT_MAX; + info->size = (((info->stride * c.height * c.bpp) / 8) + + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + } else { + gvt_vgpu_err("invalid plane id:%d\n", plane_id); + return -EINVAL; + } + + if (info->size == 0) { + gvt_vgpu_err("fb size is zero\n"); + return -EINVAL; + } + + if (info->start & (PAGE_SIZE - 1)) { + gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start); + return -EFAULT; + } + if (((info->start >> PAGE_SHIFT) + info->size) > + ggtt_total_entries(&dev_priv->ggtt)) { + gvt_vgpu_err("Invalid GTT offset or size\n"); + return -EFAULT; + } + + if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) { + gvt_vgpu_err("invalid gma addr\n"); + return -EFAULT; + } + + return 0; +} + +static struct intel_vgpu_dmabuf_obj * +pick_dmabuf_by_info(struct intel_vgpu *vgpu, + struct intel_vgpu_fb_info *latest_info) +{ + struct list_head *pos; + struct intel_vgpu_fb_info *fb_info; + struct intel_vgpu_dmabuf_obj *dmabuf_obj = NULL; + struct intel_vgpu_dmabuf_obj *ret = NULL; + + list_for_each(pos, &vgpu->dmabuf_obj_list_head) { + dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, + list); + if ((dmabuf_obj == NULL) || + (dmabuf_obj->info == NULL)) + continue; + + fb_info = (struct intel_vgpu_fb_info *)dmabuf_obj->info; + if ((fb_info->start == latest_info->start) && + (fb_info->start_gpa == latest_info->start_gpa) && + (fb_info->size == latest_info->size) && + (fb_info->drm_format_mod == latest_info->drm_format_mod) && + (fb_info->drm_format == latest_info->drm_format) && + (fb_info->width == latest_info->width) && + (fb_info->height == latest_info->height)) { + ret = dmabuf_obj; + break; + } + } + + return ret; +} + +static struct intel_vgpu_dmabuf_obj * +pick_dmabuf_by_num(struct intel_vgpu *vgpu, u32 id) +{ + struct list_head *pos; + struct intel_vgpu_dmabuf_obj *dmabuf_obj = NULL; + struct intel_vgpu_dmabuf_obj *ret = NULL; + + list_for_each(pos, &vgpu->dmabuf_obj_list_head) { + dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, + list); + if (!dmabuf_obj) + continue; + + if (dmabuf_obj->dmabuf_id == id) { + ret = dmabuf_obj; + break; + } + } + + return ret; +} + +static void update_fb_info(struct vfio_device_gfx_plane_info *gvt_dmabuf, + struct intel_vgpu_fb_info *fb_info) +{ + gvt_dmabuf->drm_format = fb_info->drm_format; + gvt_dmabuf->width = fb_info->width; + gvt_dmabuf->height = fb_info->height; + gvt_dmabuf->stride = fb_info->stride; + gvt_dmabuf->size = fb_info->size; + gvt_dmabuf->x_pos = fb_info->x_pos; + gvt_dmabuf->y_pos = fb_info->y_pos; + gvt_dmabuf->x_hot = fb_info->x_hot; + gvt_dmabuf->y_hot = fb_info->y_hot; +} + +int intel_vgpu_query_plane(struct intel_vgpu *vgpu, void *args) +{ + struct drm_device *dev = &vgpu->gvt->dev_priv->drm; + struct vfio_device_gfx_plane_info *gfx_plane_info = args; + struct intel_vgpu_dmabuf_obj *dmabuf_obj; + struct intel_vgpu_fb_info fb_info; + int ret = 0; + + if (gfx_plane_info->flags == (VFIO_GFX_PLANE_TYPE_DMABUF | + VFIO_GFX_PLANE_TYPE_PROBE)) + return ret; + else if ((gfx_plane_info->flags & ~VFIO_GFX_PLANE_TYPE_DMABUF) || + (!gfx_plane_info->flags)) + return -EINVAL; + + ret = vgpu_get_plane_info(dev, vgpu, &fb_info, + gfx_plane_info->drm_plane_type); + if (ret != 0) + goto out; + + mutex_lock(&vgpu->dmabuf_lock); + /* If exists, pick up the exposed dmabuf_obj */ + dmabuf_obj = pick_dmabuf_by_info(vgpu, &fb_info); + if (dmabuf_obj) { + update_fb_info(gfx_plane_info, &fb_info); + gfx_plane_info->dmabuf_id = dmabuf_obj->dmabuf_id; + + /* This buffer may be released between query_plane ioctl and + * get_dmabuf ioctl. Add the refcount to make sure it won't + * be released between the two ioctls. + */ + if (!dmabuf_obj->initref) { + dmabuf_obj->initref = true; + dmabuf_obj_get(dmabuf_obj); + } + ret = 0; + gvt_dbg_dpy("vgpu%d: re-use dmabuf_obj ref %d, id %d\n", + vgpu->id, kref_read(&dmabuf_obj->kref), + gfx_plane_info->dmabuf_id); + mutex_unlock(&vgpu->dmabuf_lock); + goto out; + } + + mutex_unlock(&vgpu->dmabuf_lock); + + /* Need to allocate a new one*/ + dmabuf_obj = kmalloc(sizeof(struct intel_vgpu_dmabuf_obj), GFP_KERNEL); + if (unlikely(!dmabuf_obj)) { + gvt_vgpu_err("alloc dmabuf_obj failed\n"); + ret = -ENOMEM; + goto out; + } + + dmabuf_obj->info = kmalloc(sizeof(struct intel_vgpu_fb_info), + GFP_KERNEL); + if (unlikely(!dmabuf_obj->info)) { + gvt_vgpu_err("allocate intel vgpu fb info failed\n"); + ret = -ENOMEM; + goto out_free_dmabuf; + } + memcpy(dmabuf_obj->info, &fb_info, sizeof(struct intel_vgpu_fb_info)); + + ((struct intel_vgpu_fb_info *)dmabuf_obj->info)->obj = dmabuf_obj; + + dmabuf_obj->vgpu = vgpu; + + ret = idr_alloc(&vgpu->object_idr, dmabuf_obj, 1, 0, GFP_NOWAIT); + if (ret < 0) + goto out_free_info; + gfx_plane_info->dmabuf_id = ret; + dmabuf_obj->dmabuf_id = ret; + + dmabuf_obj->initref = true; + + kref_init(&dmabuf_obj->kref); + + mutex_lock(&vgpu->dmabuf_lock); + if (intel_gvt_hypervisor_get_vfio_device(vgpu)) { + gvt_vgpu_err("get vfio device failed\n"); + mutex_unlock(&vgpu->dmabuf_lock); + goto out_free_info; + } + mutex_unlock(&vgpu->dmabuf_lock); + + update_fb_info(gfx_plane_info, &fb_info); + + INIT_LIST_HEAD(&dmabuf_obj->list); + mutex_lock(&vgpu->dmabuf_lock); + list_add_tail(&dmabuf_obj->list, &vgpu->dmabuf_obj_list_head); + mutex_unlock(&vgpu->dmabuf_lock); + + gvt_dbg_dpy("vgpu%d: %s new dmabuf_obj ref %d, id %d\n", vgpu->id, + __func__, kref_read(&dmabuf_obj->kref), ret); + + return 0; + +out_free_info: + kfree(dmabuf_obj->info); +out_free_dmabuf: + kfree(dmabuf_obj); +out: + /* ENODEV means plane isn't ready, which might be a normal case. */ + return (ret == -ENODEV) ? 0 : ret; +} + +/* To associate an exposed dmabuf with the dmabuf_obj */ +int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id) +{ + struct drm_device *dev = &vgpu->gvt->dev_priv->drm; + struct intel_vgpu_dmabuf_obj *dmabuf_obj; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + int dmabuf_fd; + int ret = 0; + + mutex_lock(&vgpu->dmabuf_lock); + + dmabuf_obj = pick_dmabuf_by_num(vgpu, dmabuf_id); + if (dmabuf_obj == NULL) { + gvt_vgpu_err("invalid dmabuf id:%d\n", dmabuf_id); + ret = -EINVAL; + goto out; + } + + obj = vgpu_create_gem(dev, dmabuf_obj->info); + if (obj == NULL) { + gvt_vgpu_err("create gvt gem obj failed:%d\n", vgpu->id); + ret = -ENOMEM; + goto out; + } + + obj->gvt_info = dmabuf_obj->info; + + dmabuf = i915_gem_prime_export(dev, &obj->base, DRM_CLOEXEC | DRM_RDWR); + if (IS_ERR(dmabuf)) { + gvt_vgpu_err("export dma-buf failed\n"); + ret = PTR_ERR(dmabuf); + goto out_free_gem; + } + obj->base.dma_buf = dmabuf; + + i915_gem_object_put(obj); + + ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR); + if (ret < 0) { + gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret); + goto out_free_dmabuf; + } + dmabuf_fd = ret; + + if (intel_gvt_hypervisor_get_vfio_device(vgpu)) { + gvt_vgpu_err("get vfio device failed\n"); + put_unused_fd(ret); + goto out_free_dmabuf; + } + + dmabuf_obj_get(dmabuf_obj); + + if (dmabuf_obj->initref) { + dmabuf_obj->initref = false; + dmabuf_obj_put(dmabuf_obj); + } + + mutex_unlock(&vgpu->dmabuf_lock); + + gvt_dbg_dpy("vgpu%d: dmabuf:%d, dmabuf ref %d, fd:%d\n" + " file count: %ld, GEM ref: %d\n", + vgpu->id, dmabuf_obj->dmabuf_id, + kref_read(&dmabuf_obj->kref), + dmabuf_fd, + file_count(dmabuf->file), + kref_read(&obj->base.refcount)); + + return dmabuf_fd; + +out_free_dmabuf: + dma_buf_put(dmabuf); +out_free_gem: + i915_gem_object_put(obj); +out: + mutex_unlock(&vgpu->dmabuf_lock); + return ret; +} + +void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu) +{ + struct list_head *pos, *n; + struct intel_vgpu_dmabuf_obj *dmabuf_obj; + + mutex_lock(&vgpu->dmabuf_lock); + list_for_each_safe(pos, n, &vgpu->dmabuf_obj_list_head) { + dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, + list); + if (dmabuf_obj->initref) { + dmabuf_obj->initref = false; + dmabuf_obj_put(dmabuf_obj); + } + } + mutex_unlock(&vgpu->dmabuf_lock); +} diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.h b/drivers/gpu/drm/i915/gvt/dmabuf.h new file mode 100644 index 000000000000..5f8f03fb1d1b --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/dmabuf.h @@ -0,0 +1,67 @@ +/* + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Zhiyuan Lv + * + * Contributors: + * Xiaoguang Chen + * Tina Zhang + */ + +#ifndef _GVT_DMABUF_H_ +#define _GVT_DMABUF_H_ +#include + +struct intel_vgpu_fb_info { + __u64 start; + __u64 start_gpa; + __u64 drm_format_mod; + __u32 drm_format; /* drm format of plane */ + __u32 width; /* width of plane */ + __u32 height; /* height of plane */ + __u32 stride; /* stride of plane */ + __u32 size; /* size of plane in bytes, align on page */ + __u32 x_pos; /* horizontal position of cursor plane */ + __u32 y_pos; /* vertical position of cursor plane */ + __u32 x_hot; /* horizontal position of cursor hotspot */ + __u32 y_hot; /* vertical position of cursor hotspot */ + struct intel_vgpu_dmabuf_obj *obj; +}; + +/** + * struct intel_vgpu_dmabuf_obj- Intel vGPU device buffer object + */ +struct intel_vgpu_dmabuf_obj { + struct intel_vgpu *vgpu; + struct intel_vgpu_fb_info *info; + __u32 dmabuf_id; + struct kref kref; + bool initref; + struct list_head list; +}; + +int intel_vgpu_query_plane(struct intel_vgpu *vgpu, void *args); +int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id); +void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu); + +#endif diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 3a74a408a966..9a5dce3aa10a 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -181,6 +181,8 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_deactivate = intel_gvt_deactivate_vgpu, .gvt_find_vgpu_type = intel_gvt_find_vgpu_type, .get_gvt_attrs = intel_get_gvt_attrs, + .vgpu_query_plane = intel_vgpu_query_plane, + .vgpu_get_dmabuf = intel_vgpu_get_dmabuf, }; /** diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 357d458cded0..77df9bad5dea 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -47,6 +47,7 @@ #include "render.h" #include "cmd_parser.h" #include "fb_decoder.h" +#include "dmabuf.h" #define GVT_MAX_VGPU 8 @@ -209,8 +210,16 @@ struct intel_vgpu { struct kvm *kvm; struct work_struct release_work; atomic_t released; + struct vfio_device *vfio_device; } vdev; #endif + + struct list_head dmabuf_obj_list_head; + struct mutex dmabuf_lock; + struct idr object_idr; + + struct completion vblank_done; + }; /* validating GM healthy status*/ @@ -536,6 +545,8 @@ struct intel_gvt_ops { const char *name); bool (*get_gvt_attrs)(struct attribute ***type_attrs, struct attribute_group ***intel_vgpu_type_groups); + int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *); + int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int); }; diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 32c345c3fa27..a1bd82feb827 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -56,6 +56,8 @@ struct intel_gvt_mpt { int (*set_trap_area)(unsigned long handle, u64 start, u64 end, bool map); int (*set_opregion)(void *vgpu); + int (*get_vfio_device)(void *vgpu); + void (*put_vfio_device)(void *vgpu); }; extern struct intel_gvt_mpt xengt_mpt; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index e0cda45ac6c2..b8a85e08091a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -377,10 +377,23 @@ static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags; vgpu->vdev.region[vgpu->vdev.num_regions].data = data; vgpu->vdev.num_regions++; - return 0; } +static int kvmgt_get_vfio_device(void *p_vgpu) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + + vgpu->vdev.vfio_device = vfio_device_get_from_dev( + mdev_dev(vgpu->vdev.mdev)); + if (!vgpu->vdev.vfio_device) { + gvt_vgpu_err("failed to get vfio device\n"); + return -ENODEV; + } + return 0; +} + + static int kvmgt_set_opregion(void *p_vgpu) { struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; @@ -409,6 +422,14 @@ static int kvmgt_set_opregion(void *p_vgpu) return ret; } +static void kvmgt_put_vfio_device(void *vgpu) +{ + if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device)) + return; + + vfio_device_put(((struct intel_vgpu *)vgpu)->vdev.vfio_device); +} + static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) { struct intel_vgpu *vgpu = NULL; @@ -1146,6 +1167,33 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } else if (cmd == VFIO_DEVICE_RESET) { intel_gvt_ops->vgpu_reset(vgpu); return 0; + } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) { + struct vfio_device_gfx_plane_info dmabuf; + int ret = 0; + + minsz = offsetofend(struct vfio_device_gfx_plane_info, + dmabuf_id); + if (copy_from_user(&dmabuf, (void __user *)arg, minsz)) + return -EFAULT; + if (dmabuf.argsz < minsz) + return -EINVAL; + + ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf); + if (ret != 0) + return ret; + + return copy_to_user((void __user *)arg, &dmabuf, minsz) ? + -EFAULT : 0; + } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) { + __u32 dmabuf_id; + __s32 dmabuf_fd; + + if (get_user(dmabuf_id, (__u32 __user *)arg)) + return -EFAULT; + + dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id); + return dmabuf_fd; + } return 0; @@ -1387,6 +1435,9 @@ static int kvmgt_guest_init(struct mdev_device *mdev) kvmgt_protect_table_init(info); gvt_cache_init(vgpu); + mutex_init(&vgpu->dmabuf_lock); + init_completion(&vgpu->vblank_done); + info->track_node.track_write = kvmgt_page_track_write; info->track_node.track_flush_slot = kvmgt_page_track_flush_slot; kvm_page_track_register_notifier(kvm, &info->track_node); @@ -1528,6 +1579,8 @@ struct intel_gvt_mpt kvmgt_mpt = { .write_gpa = kvmgt_write_gpa, .gfn_to_mfn = kvmgt_gfn_to_pfn, .set_opregion = kvmgt_set_opregion, + .get_vfio_device = kvmgt_get_vfio_device, + .put_vfio_device = kvmgt_put_vfio_device, }; EXPORT_SYMBOL_GPL(kvmgt_mpt); diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index c99e7964731c..ca8005a6d5fa 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -309,4 +309,34 @@ static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu) return intel_gvt_host.mpt->set_opregion(vgpu); } +/** + * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count + * @vgpu: a vGPU + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline int intel_gvt_hypervisor_get_vfio_device(struct intel_vgpu *vgpu) +{ + if (!intel_gvt_host.mpt->get_vfio_device) + return 0; + + return intel_gvt_host.mpt->get_vfio_device(vgpu); +} + +/** + * intel_gvt_hypervisor_put_vfio_device - decrease vfio device ref count + * @vgpu: a vGPU + * + * Returns: + * Zero on success, negative error code if failed. + */ +static inline void intel_gvt_hypervisor_put_vfio_device(struct intel_vgpu *vgpu) +{ + if (!intel_gvt_host.mpt->put_vfio_device) + return; + + intel_gvt_host.mpt->put_vfio_device(vgpu); +} + #endif /* _GVT_MPT_H_ */ diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index dcdd72260cc9..39926176fbeb 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -236,6 +236,7 @@ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) } intel_vgpu_stop_schedule(vgpu); + intel_vgpu_dmabuf_cleanup(vgpu); mutex_unlock(&gvt->lock); } @@ -265,6 +266,7 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) intel_gvt_hypervisor_detach_vgpu(vgpu); intel_vgpu_free_resource(vgpu); intel_vgpu_clean_mmio(vgpu); + intel_vgpu_dmabuf_cleanup(vgpu); vfree(vgpu); intel_gvt_update_vgpu_types(gvt); @@ -349,7 +351,8 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, vgpu->handle = param->handle; vgpu->gvt = gvt; vgpu->sched_ctl.weight = param->weight; - + INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head); + idr_init(&vgpu->object_idr); intel_vgpu_init_cfg_space(vgpu, param->primary); ret = intel_vgpu_init_mmio(vgpu); diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 19fb28c177d8..05e89e1c0a08 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -261,6 +261,8 @@ struct drm_i915_gem_object { } userptr; unsigned long scratch; + + void *gvt_info; }; /** for phys allocated objects */ From dfb6ae4e14bd6542ee6caa8f661f269c94e19a9f Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Thu, 23 Nov 2017 16:26:37 +0800 Subject: [PATCH 08/87] drm/i915/gvt: Handle orphan dmabuf_objs dmabuf_obj's destruction relys on GEM release operation, which is managed in i915 driver. And there is a time window between vgpu's destruction and its dmabuf_objs' destruction. This patch is to free the orphan dmabuf_objs correctly after the vgpu passes away. Signed-off-by: Tina Zhang Cc: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 56 +++++++++++++++++++------------ 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 190710c8d3e5..50215b26b6c8 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -88,19 +88,26 @@ static void dmabuf_gem_object_free(struct kref *kref) container_of(kref, struct intel_vgpu_dmabuf_obj, kref); struct intel_vgpu *vgpu = obj->vgpu; struct list_head *pos; - struct intel_vgpu_dmabuf_obj *dmabuf_obj; - list_for_each(pos, &vgpu->dmabuf_obj_list_head) { - dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, - list); - if (dmabuf_obj == obj) { - idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id); - kfree(dmabuf_obj->info); - kfree(dmabuf_obj); - list_del(pos); - break; + if (vgpu) { + list_for_each(pos, &vgpu->dmabuf_obj_list_head) { + dmabuf_obj = container_of(pos, + struct intel_vgpu_dmabuf_obj, list); + if (dmabuf_obj == obj) { + intel_gvt_hypervisor_put_vfio_device(vgpu); + idr_remove(&vgpu->object_idr, + dmabuf_obj->dmabuf_id); + kfree(dmabuf_obj->info); + kfree(dmabuf_obj); + list_del(pos); + break; + } } + } else { + /* Free the orphan dmabuf_objs here */ + kfree(obj->info); + kfree(obj); } } @@ -122,11 +129,16 @@ static void vgpu_gem_release(struct drm_i915_gem_object *gem_obj) struct intel_vgpu_dmabuf_obj *obj = fb_info->obj; struct intel_vgpu *vgpu = obj->vgpu; - mutex_lock(&vgpu->dmabuf_lock); - gem_obj->base.dma_buf = NULL; - dmabuf_obj_put(obj); - intel_gvt_hypervisor_put_vfio_device(vgpu); - mutex_unlock(&vgpu->dmabuf_lock); + if (vgpu) { + mutex_lock(&vgpu->dmabuf_lock); + gem_obj->base.dma_buf = NULL; + dmabuf_obj_put(obj); + mutex_unlock(&vgpu->dmabuf_lock); + } else { + /* vgpu is NULL, as it has been removed already */ + gem_obj->base.dma_buf = NULL; + dmabuf_obj_put(obj); + } } static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = { @@ -471,12 +483,6 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id) } dmabuf_fd = ret; - if (intel_gvt_hypervisor_get_vfio_device(vgpu)) { - gvt_vgpu_err("get vfio device failed\n"); - put_unused_fd(ret); - goto out_free_dmabuf; - } - dmabuf_obj_get(dmabuf_obj); if (dmabuf_obj->initref) { @@ -518,6 +524,14 @@ void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu) dmabuf_obj->initref = false; dmabuf_obj_put(dmabuf_obj); } + + idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id); + + if (dmabuf_obj->vgpu) + intel_gvt_hypervisor_put_vfio_device(vgpu); + + dmabuf_obj->vgpu = NULL; + } mutex_unlock(&vgpu->dmabuf_lock); } From 4a136d590bd4c579bb3df154363a3e5b51cb36ce Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Tue, 28 Nov 2017 13:50:42 +0800 Subject: [PATCH 09/87] drm/i915/gvt: Introduce KBL to dma-buf on Gvt-g This patch introduces KBL platform to dma-buf on Gvt-g. Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 2 +- drivers/gpu/drm/i915/gvt/fb_decoder.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 50215b26b6c8..5f39e90f961c 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -164,7 +164,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, obj->base.read_domains = I915_GEM_DOMAIN_GTT; obj->base.write_domain = 0; - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { unsigned int tiling_mode = 0; unsigned int stride = 0; diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 6c99c64d8e14..72f42176f35c 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -150,7 +150,7 @@ static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe, u32 stride_reg = vgpu_vreg(vgpu, DSPSTRIDE(pipe)) & stride_mask; u32 stride = stride_reg; - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { switch (tiled) { case PLANE_CTL_TILED_LINEAR: stride = stride_reg * 64; @@ -214,7 +214,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, if (!plane->enabled) return -ENODEV; - if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { plane->tiled = (val & PLANE_CTL_TILED_MASK) >> _PLANE_CTL_TILED_SHIFT; fmt = skl_format_to_drm( @@ -253,8 +253,9 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, } plane->stride = intel_vgpu_get_stride(vgpu, pipe, (plane->tiled << 10), - (IS_SKYLAKE(dev_priv)) ? (_PRI_PLANE_STRIDE_MASK >> 6) : - _PRI_PLANE_STRIDE_MASK, plane->bpp); + (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) ? + (_PRI_PLANE_STRIDE_MASK >> 6) : + _PRI_PLANE_STRIDE_MASK, plane->bpp); plane->width = (vgpu_vreg(vgpu, PIPESRC(pipe)) & _PIPE_H_SRCSZ_MASK) >> _PIPE_H_SRCSZ_SHIFT; From 82a3b6701ab1e2cdfb64cf163fc6d1a4576329ff Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Wed, 29 Nov 2017 14:57:37 +0800 Subject: [PATCH 10/87] drm/i915/gvt: Free dmabuf_obj list in intel_vgpu_dmabuf_cleanup The per vGPU dmabuf_obj list should be released in intel_vgpu_dmabuf_ cleanup, which is invoked either in the process of closing a VM or in the process of removing a vGPU. Fixes: e3a0d7976c53 ("drm/i915/gvt: Handle orphan dmabuf_objs") Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 5f39e90f961c..9c40a67ecdd6 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -90,7 +90,7 @@ static void dmabuf_gem_object_free(struct kref *kref) struct list_head *pos; struct intel_vgpu_dmabuf_obj *dmabuf_obj; - if (vgpu) { + if (vgpu && vgpu->active && !list_empty(&vgpu->dmabuf_obj_list_head)) { list_for_each(pos, &vgpu->dmabuf_obj_list_head) { dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, list); @@ -530,6 +530,7 @@ void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu) if (dmabuf_obj->vgpu) intel_gvt_hypervisor_put_vfio_device(vgpu); + list_del(pos); dmabuf_obj->vgpu = NULL; } From 59a716c6477c2a095adf274e8f76b9889af7bc7b Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Wed, 29 Nov 2017 15:40:06 +0800 Subject: [PATCH 11/87] drm/i915/gvt: Convert macro queue_workload to a function Convert the macro to a function which should always be preferred. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 11 +++++++++++ drivers/gpu/drm/i915/gvt/scheduler.h | 7 +------ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index fa4929584744..769c1c24ae75 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -458,7 +458,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, gvt_dbg_el("workload %p emulate schedule_in %d\n", workload, emulate_schedule_in); - queue_workload(workload); + intel_vgpu_queue_workload(workload); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 13ccc00f0d40..abf71be092f8 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1328,3 +1328,14 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, return workload; } + +/** + * intel_vgpu_queue_workload - Qeue a vGPU workload + * @workload: the workload to queue in + */ +void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload) +{ + list_add_tail(&workload->list, + workload_q_head(workload->vgpu, workload->ring_id)); + wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->ring_id]); +} diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index e4a9f9acd4a9..3de77dfa7c59 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -125,12 +125,7 @@ struct intel_vgpu_shadow_bb { #define workload_q_head(vgpu, ring_id) \ (&(vgpu->submission.workload_q_head[ring_id])) -#define queue_workload(workload) do { \ - list_add_tail(&workload->list, \ - workload_q_head(workload->vgpu, workload->ring_id)); \ - wake_up(&workload->vgpu->gvt-> \ - scheduler.waitq[workload->ring_id]); \ -} while (0) +void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload); int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt); From c130456cefd4cf9531f12f3e9b23805d34706ac0 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Wed, 29 Nov 2017 15:40:07 +0800 Subject: [PATCH 12/87] drm/i915/gvt: Kick scheduler when new workload queued The current schedule policy rely on a 1ms timer to execute workload. This can introduce maximum 1ms unnecessary latency. This is especially bad for small media workloads. And I don't think we need this timer for QoS, but the change is not simply remove the code. So I made a new API intel_gvt_kick_schedule() for future change. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/sched_policy.c | 5 +++++ drivers/gpu/drm/i915/gvt/sched_policy.h | 2 ++ drivers/gpu/drm/i915/gvt/scheduler.c | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 03532dfc0cd5..eea1a2f92099 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -372,6 +372,11 @@ void intel_vgpu_start_schedule(struct intel_vgpu *vgpu) vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu); } +void intel_gvt_kick_schedule(struct intel_gvt *gvt) +{ + intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED); +} + void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu) { struct intel_gvt_workload_scheduler *scheduler = diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.h b/drivers/gpu/drm/i915/gvt/sched_policy.h index ba00a5f7455f..7b59e3e88b8b 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.h +++ b/drivers/gpu/drm/i915/gvt/sched_policy.h @@ -57,4 +57,6 @@ void intel_vgpu_start_schedule(struct intel_vgpu *vgpu); void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu); +void intel_gvt_kick_schedule(struct intel_gvt *gvt); + #endif diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index abf71be092f8..76d2812f2f03 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1337,5 +1337,6 @@ void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload) { list_add_tail(&workload->list, workload_q_head(workload->vgpu, workload->ring_id)); + intel_gvt_kick_schedule(workload->vgpu->gvt); wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->ring_id]); } From da5f99eaccc10e30bf82eb02b1be74703b878720 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 1 Dec 2017 14:59:53 +0800 Subject: [PATCH 13/87] drm/i915/gvt: Don't mark vgpu context as inactive when preempted We shouldn't mark inactive for vGPU context if preempted, which would still be re-scheduled later. So keep active state. Fixes: d6c0511300dc ("drm/i915/execlists: Distinguish the incomplete context notifies") Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 76d2812f2f03..9100aebcb105 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -188,10 +188,12 @@ static int shadow_context_status_change(struct notifier_block *nb, atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: - case INTEL_CONTEXT_SCHEDULE_PREEMPTED: save_ring_hw_state(workload->vgpu, ring_id); atomic_set(&workload->shadow_ctx_active, 0); break; + case INTEL_CONTEXT_SCHEDULE_PREEMPTED: + save_ring_hw_state(workload->vgpu, ring_id); + break; default: WARN_ON(1); return NOTIFY_OK; From 1603660b3342269c95fcafee1945790342a8c28e Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 4 Dec 2017 10:42:58 +0800 Subject: [PATCH 14/87] drm/i915/gvt: set max priority for gvt context This is to workaround guest driver hang regression after preemption enable that gvt hasn't enabled handling of that for guest workload. So in effect this disables preemption for gvt context now. Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 9100aebcb105..ab9a500ba3e9 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1038,6 +1038,9 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) if (IS_ERR(s->shadow_ctx)) return PTR_ERR(s->shadow_ctx); + if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv)) + s->shadow_ctx->priority = INT_MAX; + bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); s->workloads = kmem_cache_create("gvt-g_vgpu_workload", From 006c23327f8de8575508c458131b304188d426f7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 3 Dec 2017 15:36:20 -0800 Subject: [PATCH 15/87] documentation/gpu/i915: fix docs build error after file rename Fix documentation build errors after intel_guc_loader.c was renamed to intel_guc_fw.c. Error: Cannot open file ../drivers/gpu/drm/i915/intel_guc_loader.c WARNING: kernel-doc '../scripts/kernel-doc -rst -enable-lineno -function GuC-specific firmware loader ../drivers/gpu/drm/i915/intel_guc_loader.c' failed with return code 1 Error: Cannot open file ../drivers/gpu/drm/i915/intel_guc_loader.c Error: Cannot open file ../drivers/gpu/drm/i915/intel_guc_loader.c WARNING: kernel-doc '../scripts/kernel-doc -rst -enable-lineno -internal ../drivers/gpu/drm/i915/intel_guc_loader.c' failed with return code 2 Fixes: e8668bbcb0f9 ("drm/i915/guc: Rename intel_guc_loader.c to intel_guc_fw.c") Signed-off-by: Randy Dunlap Cc: Michal Wajdeczko Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1b214f53-47f5-bef3-f58e-8136de5678ed@infradead.org --- Documentation/gpu/i915.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 21577eabaf78..84021142a8f3 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -341,10 +341,10 @@ GuC GuC-specific firmware loader ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c +.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c :doc: GuC-specific firmware loader -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c +.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c :internal: GuC-based command submission From 53ff2641a817099e1c6d1aef409ba004c3a9f1ea Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 13 Nov 2017 23:34:55 +0000 Subject: [PATCH 16/87] drm/i915/cnl: only divide up base frequency with crystal source We apply this logic to Gen9 as well. We didn't notice this issue as most part we've encountered so far only use the crystal as source for their timestamp registers. Fixes: dab9178333 ("drm/i915: expose command stream timestamp frequency to userspace") Reviewed-by: Paulo Zanoni Signed-off-by: Lionel Landwerlin Signed-off-by: Paulo Zanoni Link: https://patchwork.freedesktop.org/patch/msgid/20171113233455.12085-5-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/intel_device_info.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 02f8bf101ccd..405d70124a46 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -403,15 +403,15 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) freq = f24_mhz; break; } - } - /* Now figure out how the command stream's timestamp register - * increments from this frequency (it might increment only - * every few clock cycle). - */ - freq >>= 3 - ((rpm_config_reg & - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + /* Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - ((rpm_config_reg & + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + } return freq; } From f745e9cc7e40c4570ab5e8d5ef32bfaa6e8ced46 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 4 Dec 2017 16:48:18 +0000 Subject: [PATCH 17/87] drm/i915/gvt: Add missing breaks in switch statement The switch statement is missing breaks for the cases of GVT_FAILSAFE_INSUFFICIENT_RESOURCE and GVT_FAILSAFE_GUEST_ERR. Add them in. Detected by CoverityScan, CID#1462416 ("Missing break in switch") Fixes: e011c6ce2b4f ("drm/i915/gvt: Add VM healthy check for workload_thread") Fixes: a33fc7a0482a ("drm/i915/gvt: enter failsafe mode when guest requires more resources") Signed-off-by: Colin Ian King Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 880448d4adc7..54f5eac8bcc3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -174,8 +174,10 @@ void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) break; case GVT_FAILSAFE_INSUFFICIENT_RESOURCE: pr_err("Graphics resource is not enough for the guest\n"); + break; case GVT_FAILSAFE_GUEST_ERR: pr_err("GVT Internal error for the guest\n"); + break; default: break; } From 8e60b7f195d2536b2f090ae97f74ed19a504d60c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 4 Dec 2017 17:21:06 +0000 Subject: [PATCH 18/87] drm/i915/gvt: fix off-by-one comparison of ring_id The ring_id maximum boundary is being compared using the > operator instead of >=, leading to an off-by-one error and an out of bounds write into array vgpu->hws_pga[]. Fix this by simply using the correct comparison operator. Also re-work another comparison that uses the comparison > I915_NUM_ENGINES - 1 to use the >= idiom using to keep this consistent in this code. Detected by CoverityScan, CID#1462404 ("Out-of-bounds write") Fixes: a2ae95af9646 ("drm/i915/gvt: update CSB and CSB write pointer in virtual HWSP") Signed-off-by: Colin Ian King Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 54f5eac8bcc3..6f95bc04f0f0 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1398,7 +1398,7 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset, * update the VM CSB status correctly. Here listed registers can * support BDW, SKL or other platforms with same HWSP registers. */ - if (unlikely(ring_id < 0 || ring_id > I915_NUM_ENGINES)) { + if (unlikely(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) { gvt_vgpu_err("VM(%d) access unknown hardware status page register:0x%x\n", vgpu->id, offset); return -EINVAL; @@ -1507,7 +1507,7 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, u32 data = *(u32 *)p_data; int ret = 0; - if (WARN_ON(ring_id < 0 || ring_id > I915_NUM_ENGINES - 1)) + if (WARN_ON(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) return -EINVAL; execlist = &vgpu->submission.execlist[ring_id]; From 39ccc9852e2b46964c9c44eba52db57413ba6d27 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 9 Nov 2017 17:18:32 -0800 Subject: [PATCH 19/87] drm/i915/skl: DMC firmware for skylake v1.27 There is a new version of dmc available for skylake. Following additions from ver1.27 1. Fix for the issue where DC_STATE was getting enabled even when disabled by driver causing data corruption. Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Imre Deak Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/1510276712-29590-2-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 07e4f7bc4412..7fe4aac0facc 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -44,9 +44,9 @@ MODULE_FIRMWARE(I915_CSR_KBL); #define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) -#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin" +#define I915_CSR_SKL "i915/skl_dmc_ver1_27.bin" MODULE_FIRMWARE(I915_CSR_SKL); -#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 26) +#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 27) #define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin" MODULE_FIRMWARE(I915_CSR_BXT); From 5888fc9eac3c2ff96e76aeeb865fdb46ab2d711e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Dec 2017 13:25:13 +0000 Subject: [PATCH 20/87] drm/i915: Flush pending GTT writes before unbinding From the shrinker paths, we want to relinquish the GPU and GGTT access to the object, releasing the backing storage back to the system for swapout. As a part of that process we would unpin the pages, marking them for access by the CPU (for the swapout/swapin). However, if that process was interrupted after unbind the vma, we missed a flush of the inflight GGTT writes before we made that GTT space available again for reuse, with the prospect that we would redirect them to another page. The bug dates back to the introduction of multiple GGTT vma, but the code itself dates to commit 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()"). Fixes: 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()") Fixes: c5ad54cf7dd8 ("drm/i915: Use partial view in mmap fault handler") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171204132513.7303-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e083f242b8dc..80b78fb5daac 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) * must wait for all rendering to complete to the object (as unbinding * must anyway), and retire the requests. */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - NULL); + ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) return ret; - i915_gem_retire_requests(to_i915(obj->base.dev)); - while ((vma = list_first_entry_or_null(&obj->vma_list, struct i915_vma, obj_link))) { From 107783d00a067cd3e2ebf4564b1b8c03915186e9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Dec 2017 17:27:57 +0000 Subject: [PATCH 21/87] drm/i915: Taint (TAINT_WARN) the kernel if the GPU reset fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit History tells us that if we cannot reset the GPU now, we never will. This then impacts everything that is run subsequently. On failing the reset, we mark the driver as wedged, trying to prevent further execution on the GPU, forcing userspace to fallback to using the CPU to update its framebuffers and let the user know what happened. We also want to go one step further and add a taint to the kernel so that any subsequent faults can be traced back to this failure. This is useful for CI, where if the GPU/driver fails we want to reboot and restart testing rather than continue on into oblivion. For everyone else, the warning taint is a testament to the system unreliability. TAINT_WARN is used anytime a WARN() is emitted, which is suitable for our purposes here as well; the driver/system may behave unexpectedly after the failure. v2: Also taint if the recovery fails (again history shows us that is typically fatal). v3: Use TAINT_WARN References: https://bugs.freedesktop.org/show_bug.cgi?id=103514 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: Michał Winiarski Reviewed-by: Joonas Lahtinen Acked-by: Tomi Sarvela Link: https://patchwork.freedesktop.org/patch/msgid/20171205172757.32609-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 962d7c7c799c..b2e210b77457 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1897,9 +1897,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) disable_irq(i915->drm.irq); ret = i915_gem_reset_prepare(i915); if (ret) { - DRM_ERROR("GPU recovery failed\n"); + dev_err(i915->drm.dev, "GPU recovery failed\n"); intel_gpu_reset(i915, ALL_ENGINES); - goto error; + goto taint; } if (!intel_has_gpu_reset(i915)) { @@ -1916,7 +1916,7 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) } if (ret) { dev_err(i915->drm.dev, "Failed to reset chip\n"); - goto error; + goto taint; } i915_gem_reset(i915); @@ -1959,6 +1959,20 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) wake_up_bit(&error->flags, I915_RESET_HANDOFF); return; +taint: + /* + * History tells us that if we cannot reset the GPU now, we + * never will. This then impacts everything that is run + * subsequently. On failing the reset, we mark the driver + * as wedged, preventing further execution on the GPU. + * We also want to go one step further and add a taint to the + * kernel so that any subsequent faults can be traced back to + * this failure. This is important for CI, where if the + * GPU/driver fails we would like to reboot and restart testing + * rather than continue on into oblivion. For everyone else, + * the system should still plod along, but they have been warned! + */ + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); error: i915_gem_set_wedged(i915); i915_gem_retire_requests(i915); From e0403cb9442c6d452459f4be70f59b20482268e3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Dec 2017 11:01:17 -0800 Subject: [PATCH 22/87] drm/i915: follow single notation for workaround number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Allow to have or omit space before platform Cc: Ville Syrjälä Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171205190118.7088-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_hdmi.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 9d5e72728475..691600ce48c4 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1380,7 +1380,7 @@ static bool hdmi_12bpc_possible(const struct intel_crtc_state *crtc_state) } } - /* Display Wa #1139 */ + /* Display WA #1139 */ if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) && crtc_state->base.adjusted_mode.htotal > 5460) return false; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 67f326230a7e..07ee5ad5a13f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -58,7 +58,7 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) if (HAS_LLC(dev_priv)) { /* * WaCompressedResourceDisplayNewHashMode:skl,kbl - * Display WA#0390: skl,kbl + * Display WA #0390: skl,kbl * * Must match Sampler, Pixel Back End, and Media. See * WaCompressedResourceSamplerPbeMediaNewHashMode. @@ -8417,7 +8417,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) if (!HAS_PCH_CNP(dev_priv)) return; - /* Wa #1181 */ + /* Display WA #1181 */ I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) | CNP_PWM_CGE_GATING_DISABLE); } From 2abf3c0da99648861e900a2b264b633b9cbca6fa Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 5 Dec 2017 11:01:18 -0800 Subject: [PATCH 23/87] drm/i915: add platform tag to WA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: add more missing platform tags v3: change tag to cnp rather than using gen9,gen10 Cc: Ville Syrjälä Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171205190118.7088-2-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/intel_hdmi.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 691600ce48c4..c42a6c672b73 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1380,7 +1380,7 @@ static bool hdmi_12bpc_possible(const struct intel_crtc_state *crtc_state) } } - /* Display WA #1139 */ + /* Display WA #1139: glk */ if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) && crtc_state->base.adjusted_mode.htotal > 5460) return false; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 07ee5ad5a13f..5836181d6f8a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8417,7 +8417,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) if (!HAS_PCH_CNP(dev_priv)) return; - /* Display WA #1181 */ + /* Display WA #1181: cnp */ I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) | CNP_PWM_CGE_GATING_DISABLE); } From 2fe2d4e386f48e7cd91adea65156beeaa92d8fe7 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Dec 2017 13:53:10 +0000 Subject: [PATCH 24/87] drm/i915/huc: Move firmware selection to init_early Doing HuC firmware path selection from sanitize_options function is not perfect, while there is no problem with doing so during early init stage as we already have all needed data. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171206135316.32556-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/intel_huc.c | 60 ++++++++++++++++++++------------ drivers/gpu/drm/i915/intel_huc.h | 2 +- drivers/gpu/drm/i915/intel_uc.c | 4 +-- 4 files changed, 42 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 594fd14e66c5..bd4eea51c9ed 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3234,6 +3234,9 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_GUC_CT(dev_priv) ((dev_priv)->info.has_guc_ct) #define HAS_GUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) #define HAS_GUC_SCHED(dev_priv) (HAS_GUC(dev_priv)) + +/* For now, anything with a GuC has also HuC */ +#define HAS_HUC(dev_priv) (HAS_GUC(dev_priv)) #define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) #define HAS_RESOURCE_STREAMER(dev_priv) ((dev_priv)->info.has_resource_streamer) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 98d17254593c..6d0e050ab7d9 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -77,42 +77,56 @@ MODULE_FIRMWARE(I915_KBL_HUC_UCODE); #define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \ GLK_HUC_FW_MINOR, GLK_BLD_NUM) -/** - * intel_huc_select_fw() - selects HuC firmware for loading - * @huc: intel_huc struct - */ -void intel_huc_select_fw(struct intel_huc *huc) +static void huc_fw_select(struct intel_uc_fw *huc_fw) { + struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); struct drm_i915_private *dev_priv = huc_to_i915(huc); - intel_uc_fw_init(&huc->fw, INTEL_UC_FW_TYPE_HUC); + GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC); + + if (!HAS_HUC(dev_priv)) + return; if (i915_modparams.huc_firmware_path) { - huc->fw.path = i915_modparams.huc_firmware_path; - huc->fw.major_ver_wanted = 0; - huc->fw.minor_ver_wanted = 0; + huc_fw->path = i915_modparams.huc_firmware_path; + huc_fw->major_ver_wanted = 0; + huc_fw->minor_ver_wanted = 0; } else if (IS_SKYLAKE(dev_priv)) { - huc->fw.path = I915_SKL_HUC_UCODE; - huc->fw.major_ver_wanted = SKL_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = SKL_HUC_FW_MINOR; + huc_fw->path = I915_SKL_HUC_UCODE; + huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR; } else if (IS_BROXTON(dev_priv)) { - huc->fw.path = I915_BXT_HUC_UCODE; - huc->fw.major_ver_wanted = BXT_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = BXT_HUC_FW_MINOR; + huc_fw->path = I915_BXT_HUC_UCODE; + huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR; } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - huc->fw.path = I915_KBL_HUC_UCODE; - huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR; + huc_fw->path = I915_KBL_HUC_UCODE; + huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; } else if (IS_GEMINILAKE(dev_priv)) { - huc->fw.path = I915_GLK_HUC_UCODE; - huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR; - huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR; + huc_fw->path = I915_GLK_HUC_UCODE; + huc_fw->major_ver_wanted = GLK_HUC_FW_MAJOR; + huc_fw->minor_ver_wanted = GLK_HUC_FW_MINOR; } else { - DRM_ERROR("No HuC firmware known for platform with HuC!\n"); - return; + DRM_WARN("%s: No firmware known for this platform!\n", + intel_uc_fw_type_repr(huc_fw->type)); } } +/** + * intel_huc_init_early() - initializes HuC struct + * @huc: intel_huc struct + * + * On platforms with HuC selects firmware for uploading + */ +void intel_huc_init_early(struct intel_huc *huc) +{ + struct intel_uc_fw *huc_fw = &huc->fw; + + intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC); + huc_fw_select(huc_fw); +} + /** * huc_ucode_xfer() - DMA's the firmware * @dev_priv: the drm_i915_private device diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index aaa38b9e5817..3d757bce9ce3 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -34,7 +34,7 @@ struct intel_huc { /* HuC-specific additions */ }; -void intel_huc_select_fw(struct intel_huc *huc); +void intel_huc_init_early(struct intel_huc *huc); void intel_huc_init_hw(struct intel_huc *huc); void intel_huc_auth(struct intel_huc *huc); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 1e2a30a40ede..95b524c9f8f3 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -65,9 +65,6 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) /* Verify firmware version */ if (i915_modparams.enable_guc_loading) { - if (HAS_HUC_UCODE(dev_priv)) - intel_huc_select_fw(&dev_priv->huc); - if (intel_guc_fw_select(&dev_priv->guc)) i915_modparams.enable_guc_loading = 0; } @@ -84,6 +81,7 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) void intel_uc_init_early(struct drm_i915_private *dev_priv) { intel_guc_init_early(&dev_priv->guc); + intel_huc_init_early(&dev_priv->huc); } void intel_uc_init_fw(struct drm_i915_private *dev_priv) From 0dd940c8cec444d1864788b654280f96863f6d73 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Dec 2017 13:53:11 +0000 Subject: [PATCH 25/87] drm/i915/guc: Move firmware selection to init_early Doing GuC firmware path selection from sanitize_options function is not perfect, while there is no problem with doing so during early init stage as we already have all needed data. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171206135316.32556-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 1 + drivers/gpu/drm/i915/intel_guc_fw.c | 63 ++++++++++++++++------------- drivers/gpu/drm/i915/intel_guc_fw.h | 2 +- drivers/gpu/drm/i915/intel_uc.c | 2 +- drivers/gpu/drm/i915/intel_uc_fw.h | 5 +++ 5 files changed, 44 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index d08e760252d4..df8690738369 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -61,6 +61,7 @@ void intel_guc_init_send_regs(struct intel_guc *guc) void intel_guc_init_early(struct intel_guc *guc) { + intel_guc_fw_init_early(guc); intel_guc_ct_init_early(&guc->ct); mutex_init(&guc->send_mutex); diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index 89862fa8ab42..cbc51c960425 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -56,45 +56,54 @@ MODULE_FIRMWARE(I915_KBL_GUC_UCODE); #define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR) -/** - * intel_guc_fw_select() - selects GuC firmware for uploading - * - * @guc: intel_guc struct - * - * Return: zero when we know firmware, non-zero in other case - */ -int intel_guc_fw_select(struct intel_guc *guc) +static void guc_fw_select(struct intel_uc_fw *guc_fw) { + struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); struct drm_i915_private *dev_priv = guc_to_i915(guc); - intel_uc_fw_init(&guc->fw, INTEL_UC_FW_TYPE_GUC); + GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC); + + if (!HAS_GUC(dev_priv)) + return; if (i915_modparams.guc_firmware_path) { - guc->fw.path = i915_modparams.guc_firmware_path; - guc->fw.major_ver_wanted = 0; - guc->fw.minor_ver_wanted = 0; + guc_fw->path = i915_modparams.guc_firmware_path; + guc_fw->major_ver_wanted = 0; + guc_fw->minor_ver_wanted = 0; } else if (IS_SKYLAKE(dev_priv)) { - guc->fw.path = I915_SKL_GUC_UCODE; - guc->fw.major_ver_wanted = SKL_FW_MAJOR; - guc->fw.minor_ver_wanted = SKL_FW_MINOR; + guc_fw->path = I915_SKL_GUC_UCODE; + guc_fw->major_ver_wanted = SKL_FW_MAJOR; + guc_fw->minor_ver_wanted = SKL_FW_MINOR; } else if (IS_BROXTON(dev_priv)) { - guc->fw.path = I915_BXT_GUC_UCODE; - guc->fw.major_ver_wanted = BXT_FW_MAJOR; - guc->fw.minor_ver_wanted = BXT_FW_MINOR; + guc_fw->path = I915_BXT_GUC_UCODE; + guc_fw->major_ver_wanted = BXT_FW_MAJOR; + guc_fw->minor_ver_wanted = BXT_FW_MINOR; } else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) { - guc->fw.path = I915_KBL_GUC_UCODE; - guc->fw.major_ver_wanted = KBL_FW_MAJOR; - guc->fw.minor_ver_wanted = KBL_FW_MINOR; + guc_fw->path = I915_KBL_GUC_UCODE; + guc_fw->major_ver_wanted = KBL_FW_MAJOR; + guc_fw->minor_ver_wanted = KBL_FW_MINOR; } else if (IS_GEMINILAKE(dev_priv)) { - guc->fw.path = I915_GLK_GUC_UCODE; - guc->fw.major_ver_wanted = GLK_FW_MAJOR; - guc->fw.minor_ver_wanted = GLK_FW_MINOR; + guc_fw->path = I915_GLK_GUC_UCODE; + guc_fw->major_ver_wanted = GLK_FW_MAJOR; + guc_fw->minor_ver_wanted = GLK_FW_MINOR; } else { - DRM_ERROR("No GuC firmware known for platform with GuC!\n"); - return -ENOENT; + DRM_WARN("%s: No firmware known for this platform!\n", + intel_uc_fw_type_repr(guc_fw->type)); } +} - return 0; +/** + * intel_guc_fw_init_early() - initializes GuC firmware struct + * @guc: intel_guc struct + * + * On platforms with GuC selects firmware for uploading + */ +void intel_guc_fw_init_early(struct intel_guc *guc) +{ + struct intel_uc_fw *guc_fw = &guc->fw; + + intel_uc_fw_init(guc_fw, INTEL_UC_FW_TYPE_GUC); + guc_fw_select(guc_fw); } static void guc_prepare_xfer(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/intel_guc_fw.h b/drivers/gpu/drm/i915/intel_guc_fw.h index 023f5baa9dd6..4ec5d3d9e2b0 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.h +++ b/drivers/gpu/drm/i915/intel_guc_fw.h @@ -27,7 +27,7 @@ struct intel_guc; -int intel_guc_fw_select(struct intel_guc *guc); +void intel_guc_fw_init_early(struct intel_guc *guc); int intel_guc_fw_upload(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 95b524c9f8f3..4b7f2a78b378 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -65,7 +65,7 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) /* Verify firmware version */ if (i915_modparams.enable_guc_loading) { - if (intel_guc_fw_select(&dev_priv->guc)) + if (!intel_uc_fw_is_selected(&dev_priv->guc.fw)) i915_modparams.enable_guc_loading = 0; } diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index 5394d9d1e683..d5fd4609c785 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -110,6 +110,11 @@ void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type) uc_fw->type = type; } +static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw) +{ + return uc_fw->path != NULL; +} + void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, From 93ffbe8e9dc744f25cbdb941543030ab8de1d8f5 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Dec 2017 13:53:12 +0000 Subject: [PATCH 26/87] drm/i915/guc: Introduce USES_GUC_xxx helper macros In the upcoming patch we will change the way how to recognize when GuC is in use. Using helper macros will minimize scope of that changes. While here, update dev_info message. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171206135316.32556-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 ++++ drivers/gpu/drm/i915/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_guc.c | 2 +- drivers/gpu/drm/i915/intel_guc_log.c | 6 +++--- drivers/gpu/drm/i915/intel_gvt.c | 2 +- drivers/gpu/drm/i915/intel_uc.c | 23 +++++++++++----------- drivers/gpu/drm/i915/selftests/intel_guc.c | 2 +- 9 files changed, 25 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bd4eea51c9ed..937fa02300e3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3239,6 +3239,10 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_HUC(dev_priv) (HAS_GUC(dev_priv)) #define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) +/* Having a GuC is not the same as using a GuC */ +#define USES_GUC(dev_priv) (i915_modparams.enable_guc_loading) +#define USES_GUC_SUBMISSION(dev_priv) (i915_modparams.enable_guc_submission) + #define HAS_RESOURCE_STREAMER(dev_priv) ((dev_priv)->info.has_resource_streamer) #define HAS_POOLED_EU(dev_priv) ((dev_priv)->info.has_pooled_eu) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index ce3139e5ec4c..21ce374d9924 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -316,7 +316,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, * present or not in use we still need a small bias as ring wraparound * at offset 0 sometimes hangs. No idea why. */ - if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading) + if (USES_GUC(dev_priv)) ctx->ggtt_offset_bias = GUC_WOPCM_TOP; else ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; @@ -409,7 +409,7 @@ i915_gem_context_create_gvt(struct drm_device *dev) i915_gem_context_set_closed(ctx); /* not user accessible */ i915_gem_context_clear_bannable(ctx); i915_gem_context_set_force_single_submission(ctx); - if (!i915_modparams.enable_guc_submission) + if (!USES_GUC_SUBMISSION(to_i915(dev))) ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 41a203e0c160..86fbd67388f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3503,7 +3503,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) * currently don't have any bits spare to pass in this upper * restriction! */ - if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading) { + if (USES_GUC(dev_priv)) { ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP); ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7cac07db89b9..3517c6548e2c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1400,7 +1400,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { notify_ring(engine); - tasklet |= i915_modparams.enable_guc_submission; + tasklet |= USES_GUC_SUBMISSION(engine->i915); } if (tasklet) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index df8690738369..177ee69ca9b1 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -129,7 +129,7 @@ void intel_guc_init_params(struct intel_guc *guc) } /* If GuC submission is enabled, set up additional parameters here */ - if (i915_modparams.enable_guc_submission) { + if (USES_GUC_SUBMISSION(dev_priv)) { u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 76d3eb1e4614..1a2c5eed9929 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -505,7 +505,7 @@ static void guc_flush_logs(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - if (!i915_modparams.enable_guc_submission || + if (!USES_GUC_SUBMISSION(dev_priv) || (i915_modparams.guc_log_level < 0)) return; @@ -646,7 +646,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) void i915_guc_log_register(struct drm_i915_private *dev_priv) { - if (!i915_modparams.enable_guc_submission || + if (!USES_GUC_SUBMISSION(dev_priv) || (i915_modparams.guc_log_level < 0)) return; @@ -657,7 +657,7 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv) void i915_guc_log_unregister(struct drm_i915_private *dev_priv) { - if (!i915_modparams.enable_guc_submission) + if (!USES_GUC_SUBMISSION(dev_priv)) return; mutex_lock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index 126f7c769c69..a2fe7c8d4477 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -95,7 +95,7 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) return 0; } - if (i915_modparams.enable_guc_submission) { + if (USES_GUC_SUBMISSION(dev_priv)) { DRM_ERROR("i915 GVT-g loading failed due to Graphics virtualization is not yet supported with GuC submission\n"); return -EIO; } diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 4b7f2a78b378..ed2dd764fed7 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -152,7 +152,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) struct intel_guc *guc = &dev_priv->guc; int ret, attempts; - if (!i915_modparams.enable_guc_loading) + if (!USES_GUC(dev_priv)) return 0; guc_disable_communication(guc); @@ -161,7 +161,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(dev_priv); - if (i915_modparams.enable_guc_submission) { + if (USES_GUC_SUBMISSION(dev_priv)) { /* * This is stuff we need to have available at fw load time * if we are planning to enable submission later @@ -211,7 +211,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_log_capture; intel_huc_auth(&dev_priv->huc); - if (i915_modparams.enable_guc_submission) { + if (USES_GUC_SUBMISSION(dev_priv)) { if (i915_modparams.guc_log_level >= 0) gen9_enable_guc_interrupts(dev_priv); @@ -220,11 +220,10 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) goto err_interrupts; } - dev_info(dev_priv->drm.dev, "GuC %s (firmware %s [version %u.%u])\n", - i915_modparams.enable_guc_submission ? "submission enabled" : - "loaded", - guc->fw.path, + dev_info(dev_priv->drm.dev, "GuC firmware version %u.%u\n", guc->fw.major_ver_found, guc->fw.minor_ver_found); + dev_info(dev_priv->drm.dev, "GuC submission %s\n", + enableddisabled(USES_GUC_SUBMISSION(dev_priv))); return 0; @@ -243,7 +242,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) err_log_capture: guc_capture_load_err_log(guc); err_submission: - if (i915_modparams.enable_guc_submission) + if (USES_GUC_SUBMISSION(dev_priv)) intel_guc_submission_fini(guc); err_guc: i915_ggtt_disable_guc(dev_priv); @@ -257,7 +256,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) ret = 0; } - if (i915_modparams.enable_guc_submission) { + if (USES_GUC_SUBMISSION(dev_priv)) { i915_modparams.enable_guc_submission = 0; DRM_NOTE("Falling back from GuC submission to execlist mode\n"); } @@ -273,15 +272,15 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) guc_free_load_err_log(guc); - if (!i915_modparams.enable_guc_loading) + if (!USES_GUC(dev_priv)) return; - if (i915_modparams.enable_guc_submission) + if (USES_GUC_SUBMISSION(dev_priv)) intel_guc_submission_disable(guc); guc_disable_communication(guc); - if (i915_modparams.enable_guc_submission) { + if (USES_GUC_SUBMISSION(dev_priv)) { gen9_disable_guc_interrupts(dev_priv); intel_guc_submission_fini(guc); } diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index 7b23597858bb..68d6a69c738f 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -362,7 +362,7 @@ int intel_guc_live_selftest(struct drm_i915_private *dev_priv) SUBTEST(igt_guc_doorbells), }; - if (!i915_modparams.enable_guc_submission) + if (!USES_GUC_SUBMISSION(dev_priv)) return 0; return i915_subtests(tests, dev_priv); From a655aeb34f7cae135a31a8a643314061ca6737e3 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Dec 2017 13:53:13 +0000 Subject: [PATCH 27/87] drm/i915/uc: Don't fetch GuC firmware if no plan to use GuC If we don't plan to use GuC then we should not try to fetch GuC and HuC firmwares. We can save memory and avoid possible dmesg noise. Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171206135316.32556-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_uc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index ed2dd764fed7..c3981aa4fb28 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -86,12 +86,18 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv) void intel_uc_init_fw(struct drm_i915_private *dev_priv) { + if (!USES_GUC(dev_priv)) + return; + intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw); } void intel_uc_fini_fw(struct drm_i915_private *dev_priv) { + if (!USES_GUC(dev_priv)) + return; + intel_uc_fw_fini(&dev_priv->guc.fw); intel_uc_fw_fini(&dev_priv->huc.fw); } From 8620eb1dbbf287694ee8e0cd280fadedb1f91012 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Dec 2017 13:53:14 +0000 Subject: [PATCH 28/87] drm/i915/uc: Don't use -EIO to report missing firmware -EIO has special meaning and is used when we want to allow engine initialization to fail and mark GPU as wedged. However here at this function we should return error code that corresponds to upload status only, as any decision how to handle missing firmware should be done higher level function (silent fallback to non-GuC mode, fail into wedged mode, or abort driver load with fatal error). v2: commit message update (Michal) Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171206135316.32556-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_uc_fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index b376dd3b28cc..784eff9cdfc8 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -214,7 +214,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) - return -EIO; + return -ENOEXEC; uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; DRM_DEBUG_DRIVER("%s fw load %s\n", From 121981fafe699d9f398a3c717912ef4eae6719b1 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Dec 2017 13:53:15 +0000 Subject: [PATCH 29/87] drm/i915/guc: Combine enable_guc_loading|submission modparams We currently have two module parameters that control GuC: "enable_guc_loading" and "enable_guc_submission". Whenever we need submission=1, we also need loading=1. We also need loading=1 when we want to want to load and verify the HuC. Lets combine above module parameters into one "enable_guc" modparam. New supported bit values are: 0=disable GuC (no GuC submission, no HuC) 1=enable GuC submission 2=enable HuC load Special value "-1" can be used to let driver decide what option should be enabled for given platform based on hardware/firmware availability or preference. Explicit enabling any of the GuC features makes GuC load a required step, fallback to non-GuC mode will not be supported. v2: Don't use -EIO v3: define modparam bits (Chris) v4: rely on implicit cast (Chris) Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Cc: Sujaritha Sundaresan Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171206135316.32556-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 5 +- drivers/gpu/drm/i915/i915_params.c | 11 ++- drivers/gpu/drm/i915/i915_params.h | 7 +- drivers/gpu/drm/i915/intel_uc.c | 107 +++++++++++++++++------------ drivers/gpu/drm/i915/intel_uc.h | 19 +++++ 5 files changed, 95 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 937fa02300e3..02551c781f0a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3240,8 +3240,9 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) /* Having a GuC is not the same as using a GuC */ -#define USES_GUC(dev_priv) (i915_modparams.enable_guc_loading) -#define USES_GUC_SUBMISSION(dev_priv) (i915_modparams.enable_guc_submission) +#define USES_GUC(dev_priv) intel_uc_is_using_guc() +#define USES_GUC_SUBMISSION(dev_priv) intel_uc_is_using_guc_submission() +#define USES_HUC(dev_priv) intel_uc_is_using_huc() #define HAS_RESOURCE_STREAMER(dev_priv) ((dev_priv)->info.has_resource_streamer) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 7bc538687871..8dfea0320c2f 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -147,13 +147,10 @@ i915_param_named_unsafe(edp_vswing, int, 0400, "(0=use value from vbt [default], 1=low power swing(200mV)," "2=default swing(400mV))"); -i915_param_named_unsafe(enable_guc_loading, int, 0400, - "Enable GuC firmware loading " - "(-1=auto, 0=never [default], 1=if available, 2=required)"); - -i915_param_named_unsafe(enable_guc_submission, int, 0400, - "Enable GuC submission " - "(-1=auto, 0=never [default], 1=if available, 2=required)"); +i915_param_named_unsafe(enable_guc, int, 0400, + "Enable GuC load for GuC submission and/or HuC load. " + "Required functionality can be selected using bitmask values. " + "(-1=auto, 0=disable [default], 1=GuC submission, 2=HuC load)"); i915_param_named(guc_log_level, int, 0400, "GuC firmware logging level (-1:disabled (default), 0-3:enabled)"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index c48c88bb95e8..792ce26d7449 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -25,8 +25,12 @@ #ifndef _I915_PARAMS_H_ #define _I915_PARAMS_H_ +#include #include /* for __read_mostly */ +#define ENABLE_GUC_SUBMISSION BIT(0) +#define ENABLE_GUC_LOAD_HUC BIT(1) + #define I915_PARAMS_FOR_EACH(param) \ param(char *, vbt_firmware, NULL) \ param(int, modeset, -1) \ @@ -41,8 +45,7 @@ param(int, disable_power_well, -1) \ param(int, enable_ips, 1) \ param(int, invert_brightness, 0) \ - param(int, enable_guc_loading, 0) \ - param(int, enable_guc_submission, 0) \ + param(int, enable_guc, 0) \ param(int, guc_log_level, -1) \ param(char *, guc_firmware_path, NULL) \ param(char *, huc_firmware_path, NULL) \ diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index c3981aa4fb28..7dfc7e07982f 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -47,35 +47,65 @@ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv) return ret; } +static int __get_platform_enable_guc(struct drm_i915_private *dev_priv) +{ + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + int enable_guc = 0; + + /* Default is to enable GuC/HuC if we know their firmwares */ + if (intel_uc_fw_is_selected(guc_fw)) + enable_guc |= ENABLE_GUC_SUBMISSION; + if (intel_uc_fw_is_selected(huc_fw)) + enable_guc |= ENABLE_GUC_LOAD_HUC; + + /* Any platform specific fine-tuning can be done here */ + + return enable_guc; +} + +/** + * intel_uc_sanitize_options - sanitize uC related modparam options + * @dev_priv: device private + * + * In case of "enable_guc" option this function will attempt to modify + * it only if it was initially set to "auto(-1)". Default value for this + * modparam varies between platforms and it is hardcoded in driver code. + * Any other modparam value is only monitored against availability of the + * related hardware or firmware definitions. + */ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) { - if (!HAS_GUC(dev_priv)) { - if (i915_modparams.enable_guc_loading > 0 || - i915_modparams.enable_guc_submission > 0) - DRM_INFO("Ignoring GuC options, no hardware\n"); - - i915_modparams.enable_guc_loading = 0; - i915_modparams.enable_guc_submission = 0; - return; - } + struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; /* A negative value means "use platform default" */ - if (i915_modparams.enable_guc_loading < 0) - i915_modparams.enable_guc_loading = HAS_GUC_UCODE(dev_priv); + if (i915_modparams.enable_guc < 0) + i915_modparams.enable_guc = __get_platform_enable_guc(dev_priv); - /* Verify firmware version */ - if (i915_modparams.enable_guc_loading) { - if (!intel_uc_fw_is_selected(&dev_priv->guc.fw)) - i915_modparams.enable_guc_loading = 0; + DRM_DEBUG_DRIVER("enable_guc=%d (submission:%s huc:%s)\n", + i915_modparams.enable_guc, + yesno(intel_uc_is_using_guc_submission()), + yesno(intel_uc_is_using_huc())); + + /* Verify GuC firmware availability */ + if (intel_uc_is_using_guc() && !intel_uc_fw_is_selected(guc_fw)) { + DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n", + i915_modparams.enable_guc, + !HAS_GUC(dev_priv) ? "no GuC hardware" : + "no GuC firmware"); } - /* Can't enable guc submission without guc loaded */ - if (!i915_modparams.enable_guc_loading) - i915_modparams.enable_guc_submission = 0; + /* Verify HuC firmware availability */ + if (intel_uc_is_using_huc() && !intel_uc_fw_is_selected(huc_fw)) { + DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n", + i915_modparams.enable_guc, + !HAS_HUC(dev_priv) ? "no HuC hardware" : + "no HuC firmware"); + } - /* A negative value means "use platform default" */ - if (i915_modparams.enable_guc_submission < 0) - i915_modparams.enable_guc_submission = HAS_GUC_SCHED(dev_priv); + /* Make sure that sanitization was done */ + GEM_BUG_ON(i915_modparams.enable_guc < 0); } void intel_uc_init_early(struct drm_i915_private *dev_priv) @@ -161,6 +191,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) if (!USES_GUC(dev_priv)) return 0; + if (!HAS_GUC(dev_priv)) { + ret = -ENODEV; + goto err_out; + } + guc_disable_communication(guc); gen9_reset_guc_interrupts(dev_priv); @@ -235,12 +270,6 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) /* * We've failed to load the firmware :( - * - * Decide whether to disable GuC submission and fall back to - * execlist mode, and whether to hide the error by returning - * zero or to return -EIO, which the caller will treat as a - * nonfatal error (i.e. it doesn't prevent driver load, but - * marks the GPU as wedged until reset). */ err_interrupts: guc_disable_communication(guc); @@ -252,23 +281,15 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) intel_guc_submission_fini(guc); err_guc: i915_ggtt_disable_guc(dev_priv); +err_out: + /* + * Note that there is no fallback as either user explicitly asked for + * the GuC or driver default option was to run with the GuC enabled. + */ + if (GEM_WARN_ON(ret == -EIO)) + ret = -EINVAL; - if (i915_modparams.enable_guc_loading > 1 || - i915_modparams.enable_guc_submission > 1) { - DRM_ERROR("GuC init failed. Firmware loading disabled.\n"); - ret = -EIO; - } else { - DRM_NOTE("GuC init failed. Firmware loading disabled.\n"); - ret = 0; - } - - if (USES_GUC_SUBMISSION(dev_priv)) { - i915_modparams.enable_guc_submission = 0; - DRM_NOTE("Falling back from GuC submission to execlist mode\n"); - } - - i915_modparams.enable_guc_loading = 0; - + dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", ret); return ret; } diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index e18d3bb02088..7a59e2486e9e 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -26,6 +26,7 @@ #include "intel_guc.h" #include "intel_huc.h" +#include "i915_params.h" void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); @@ -35,4 +36,22 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); +static inline bool intel_uc_is_using_guc(void) +{ + GEM_BUG_ON(i915_modparams.enable_guc < 0); + return i915_modparams.enable_guc > 0; +} + +static inline bool intel_uc_is_using_guc_submission(void) +{ + GEM_BUG_ON(i915_modparams.enable_guc < 0); + return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION; +} + +static inline bool intel_uc_is_using_huc(void) +{ + GEM_BUG_ON(i915_modparams.enable_guc < 0); + return i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC; +} + #endif From 0dfa1cee613e03cee295b8d1ed8130c84311b584 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 6 Dec 2017 13:53:16 +0000 Subject: [PATCH 30/87] drm/i915/huc: Load HuC only if requested Our new "enable_guc" modparam allows to control whenever HuC should be loaded. However existing code will try load and authenticate HuC always when we use the GuC. This patch is trying to enforce modparam selection. v2: no need to cast PTR_ERR (Chris) fetch/fini only if required (Michal) fix wrong break (Sagar) v3: add new goto label (Sagar) Signed-off-by: Michal Wajdeczko Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171206135316.32556-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 21 +++++++++++---------- drivers/gpu/drm/i915/intel_huc.h | 4 ++-- drivers/gpu/drm/i915/intel_uc.c | 28 +++++++++++++++++++++++----- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 6d0e050ab7d9..974be3defa70 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -181,17 +181,17 @@ static int huc_ucode_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) * intel_huc_init_hw() - load HuC uCode to device * @huc: intel_huc structure * - * Called from guc_setup() during driver loading and also after a GPU reset. - * Be note that HuC loading must be done before GuC loading. + * Called from intel_uc_init_hw() during driver loading and also after a GPU + * reset. Be note that HuC loading must be done before GuC loading. * * The firmware image should have already been fetched into memory by the - * earlier call to intel_huc_init(), so here we need only check that + * earlier call to intel_uc_init_fw(), so here we need only check that * is succeeded, and then transfer the image to the h/w. * */ -void intel_huc_init_hw(struct intel_huc *huc) +int intel_huc_init_hw(struct intel_huc *huc) { - intel_uc_fw_upload(&huc->fw, huc_ucode_xfer); + return intel_uc_fw_upload(&huc->fw, huc_ucode_xfer); } /** @@ -205,7 +205,7 @@ void intel_huc_init_hw(struct intel_huc *huc) * signature through intel_guc_auth_huc(). It then waits for 50ms for * firmware verification ACK and unpins the object. */ -void intel_huc_auth(struct intel_huc *huc) +int intel_huc_auth(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_i915(huc); struct intel_guc *guc = &i915->guc; @@ -213,14 +213,14 @@ void intel_huc_auth(struct intel_huc *huc) int ret; if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) - return; + return -ENOEXEC; vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0, PIN_OFFSET_BIAS | GUC_WOPCM_TOP); if (IS_ERR(vma)) { - DRM_ERROR("failed to pin huc fw object %d\n", - (int)PTR_ERR(vma)); - return; + ret = PTR_ERR(vma); + DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); + return ret; } ret = intel_guc_auth_huc(guc, @@ -243,4 +243,5 @@ void intel_huc_auth(struct intel_huc *huc) out: i915_vma_unpin(vma); + return ret; } diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index 3d757bce9ce3..40039db59e04 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -35,7 +35,7 @@ struct intel_huc { }; void intel_huc_init_early(struct intel_huc *huc); -void intel_huc_init_hw(struct intel_huc *huc); -void intel_huc_auth(struct intel_huc *huc); +int intel_huc_init_hw(struct intel_huc *huc); +int intel_huc_auth(struct intel_huc *huc); #endif diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 7dfc7e07982f..461047c86e0d 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -119,7 +119,9 @@ void intel_uc_init_fw(struct drm_i915_private *dev_priv) if (!USES_GUC(dev_priv)) return; - intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); + if (USES_HUC(dev_priv)) + intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); + intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw); } @@ -129,7 +131,9 @@ void intel_uc_fini_fw(struct drm_i915_private *dev_priv) return; intel_uc_fw_fini(&dev_priv->guc.fw); - intel_uc_fw_fini(&dev_priv->huc.fw); + + if (USES_HUC(dev_priv)) + intel_uc_fw_fini(&dev_priv->huc.fw); } /** @@ -186,6 +190,7 @@ static void guc_disable_communication(struct intel_guc *guc) int intel_uc_init_hw(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; + struct intel_huc *huc = &dev_priv->huc; int ret, attempts; if (!USES_GUC(dev_priv)) @@ -233,7 +238,12 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) if (ret) goto err_submission; - intel_huc_init_hw(&dev_priv->huc); + if (USES_HUC(dev_priv)) { + ret = intel_huc_init_hw(huc); + if (ret) + goto err_submission; + } + intel_guc_init_params(guc); ret = intel_guc_fw_upload(guc); if (ret == 0 || ret != -EAGAIN) @@ -251,7 +261,12 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) if (ret) goto err_log_capture; - intel_huc_auth(&dev_priv->huc); + if (USES_HUC(dev_priv)) { + ret = intel_huc_auth(huc); + if (ret) + goto err_communication; + } + if (USES_GUC_SUBMISSION(dev_priv)) { if (i915_modparams.guc_log_level >= 0) gen9_enable_guc_interrupts(dev_priv); @@ -265,6 +280,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) guc->fw.major_ver_found, guc->fw.minor_ver_found); dev_info(dev_priv->drm.dev, "GuC submission %s\n", enableddisabled(USES_GUC_SUBMISSION(dev_priv))); + dev_info(dev_priv->drm.dev, "HuC %s\n", + enableddisabled(USES_HUC(dev_priv))); return 0; @@ -272,8 +289,9 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) * We've failed to load the firmware :( */ err_interrupts: - guc_disable_communication(guc); gen9_disable_guc_interrupts(dev_priv); +err_communication: + guc_disable_communication(guc); err_log_capture: guc_capture_load_err_log(guc); err_submission: From 010e3e68cd9cb65ea50c0af605e966cda333cb2a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 Dec 2017 12:49:13 +0000 Subject: [PATCH 31/87] drm/i915: Remove vma from object on destroy, not close Originally we translated from the object to the vma by walking obj->vma_list to find the matching vm (for user lookups). Now we process user lookups using the rbtree, and we only use obj->vma_list itself for maintaining state (e.g. ensuring that all vma are flushed or rebound). As such maintenance needs to go on beyond the user's awareness of the vma, defer removal of the vma from the obj->vma_list from i915_vma_close() to i915_vma_destroy() Fixes: 5888fc9eac3c ("drm/i915: Flush pending GTT writes before unbinding") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104155 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171206124914.19960-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- drivers/gpu/drm/i915/i915_vma.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 80b78fb5daac..5504be753092 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3746,7 +3746,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return -EBUSY; } - if (i915_gem_valid_gtt_space(vma, cache_level)) + if (!i915_vma_is_closed(vma) && + i915_gem_valid_gtt_space(vma, cache_level)) continue; ret = i915_vma_unbind(vma); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bf6d8d1eaabe..1013403fcfea 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -466,6 +466,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) u64 start, end; int ret; + GEM_BUG_ON(i915_vma_is_closed(vma)); GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); @@ -678,7 +679,9 @@ static void i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i])); GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); + list_del(&vma->obj_link); list_del(&vma->vm_link); + if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -690,7 +693,6 @@ void i915_vma_close(struct i915_vma *vma) GEM_BUG_ON(i915_vma_is_closed(vma)); vma->flags |= I915_VMA_CLOSED; - list_del(&vma->obj_link); rb_erase(&vma->obj_node, &vma->obj->vma_tree); if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) From 7125397b82460d74ae0584bdcdc006deec5e895d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 Dec 2017 12:49:14 +0000 Subject: [PATCH 32/87] drm/i915: Track GGTT writes on the vma As writes through the GTT and GGTT PTE updates do not share the same path, they are not strictly ordered and so we must explicitly flush the indirect writes prior to modifying the PTE. We do track outstanding GGTT writes on the object itself, but since the object may have multiple GGTT vma, that is overly coarse as we can track and flush individual vma as required. Whilst here, update the GGTT flushing behaviour for Cannonlake. v2: Hard-code ring offset to allow use during unload (after RCS may have been freed, or never existed!) References: https://bugs.freedesktop.org/show_bug.cgi?id=104002 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171206124914.19960-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 58 +++++++++++++++++++++++---------- drivers/gpu/drm/i915/i915_vma.c | 22 +++++++++++++ drivers/gpu/drm/i915/i915_vma.h | 19 +++++++++++ 4 files changed, 83 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 02551c781f0a..f9386e793c87 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3887,6 +3887,8 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm); +void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv); + /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5504be753092..67dc11effc8e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -666,17 +666,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } -static void -flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - - if (!(obj->base.write_domain & flush_domains)) - return; - - /* No actual flushing is required for the GTT write domain. Writes - * to it "immediately" go to main memory as far as we know, so there's - * no chipset flush. It also doesn't land in render cache. + /* + * No actual flushing is required for the GTT write domain for reads + * from the GTT domain. Writes to it "immediately" go to main memory + * as far as we know, so there's no chipset flush. It also doesn't + * land in the GPU render cache. * * However, we do have to enforce the order so that all writes through * the GTT land before any writes to the device, such as updates to @@ -687,22 +683,46 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) * timing. This issue has only been observed when switching quickly * between GTT writes and CPU reads from inside the kernel on recent hw, * and it appears to only affect discrete GTT blocks (i.e. on LLC - * system agents we cannot reproduce this behaviour). + * system agents we cannot reproduce this behaviour, until Cannonlake + * that was!). */ + wmb(); + intel_runtime_pm_get(dev_priv); + spin_lock_irq(&dev_priv->uncore.lock); + + POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); + + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); +} + +static void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_vma *vma; + + if (!(obj->base.write_domain & flush_domains)) + return; + switch (obj->base.write_domain) { case I915_GEM_DOMAIN_GTT: - if (!HAS_LLC(dev_priv)) { - intel_runtime_pm_get(dev_priv); - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); - } + i915_gem_flush_ggtt_writes(dev_priv); intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + break; + + if (vma->iomap) + continue; + + i915_vma_unset_ggtt_write(vma); + } break; case I915_GEM_DOMAIN_CPU: @@ -1965,6 +1985,8 @@ int i915_gem_fault(struct vm_fault *vmf) list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); GEM_BUG_ON(!obj->userfault_count); + i915_vma_set_ggtt_write(vma); + err_fence: i915_vma_unpin_fence(vma); err_unpin: diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 1013403fcfea..0ebd75693505 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -322,6 +322,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) if (err) goto err_unpin; + i915_vma_set_ggtt_write(vma); return ptr; err_unpin: @@ -330,12 +331,24 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) return IO_ERR_PTR(err); } +void i915_vma_flush_writes(struct i915_vma *vma) +{ + if (!i915_vma_has_ggtt_write(vma)) + return; + + i915_gem_flush_ggtt_writes(vma->vm->i915); + + i915_vma_unset_ggtt_write(vma); +} + void i915_vma_unpin_iomap(struct i915_vma *vma) { lockdep_assert_held(&vma->obj->base.dev->struct_mutex); GEM_BUG_ON(vma->iomap == NULL); + i915_vma_flush_writes(vma); + i915_vma_unpin_fence(vma); i915_vma_unpin(vma); } @@ -792,6 +805,15 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); if (i915_vma_is_map_and_fenceable(vma)) { + /* + * Check that we have flushed all writes through the GGTT + * before the unbind, other due to non-strict nature of those + * indirect writes they may end up referencing the GGTT PTE + * after the unbind. + */ + i915_vma_flush_writes(vma); + GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); + /* release the fence reg _after_ flushing */ ret = i915_vma_put_fence(vma); if (ret) diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 1e2bc9b3c3ac..f636243eb8f7 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -90,6 +90,7 @@ struct i915_vma { #define I915_VMA_CLOSED BIT(10) #define I915_VMA_USERFAULT_BIT 11 #define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) +#define I915_VMA_GGTT_WRITE BIT(12) unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; @@ -138,6 +139,24 @@ static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) return vma->flags & I915_VMA_GGTT; } +static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_GGTT_WRITE; +} + +static inline void i915_vma_set_ggtt_write(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + vma->flags |= I915_VMA_GGTT_WRITE; +} + +static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma) +{ + vma->flags &= ~I915_VMA_GGTT_WRITE; +} + +void i915_vma_flush_writes(struct i915_vma *vma); + static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) { return vma->flags & I915_VMA_CAN_FENCE; From e2189dd078a2b83d370ae9cde459e5c6711936de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 Dec 2017 21:14:07 +0000 Subject: [PATCH 33/87] drm/i915: Refactor common list iteration over GGTT vma In quite a few places, we have a list iteration over the vma on an object that only want to inspect GGTT vma. By construction, these are placed at the start of the list, so we have copied that knowledge into many callsites. Pull that knowledge back to i915_vma.h and provide a for_each_ggtt_vma() to tidy up the code. v2: Add a backreference from vma_create() to remind ourselves why we put ggtt vma at the head of the obj->vma_list (and ppgtt vma at the tail). v3: Fixup s/vma/V/ Suggested-by: Joonas Lahtinen Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171207211407.31549-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 18 ++++-------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +---- drivers/gpu/drm/i915/i915_gem_tiling.c | 10 ++-------- drivers/gpu/drm/i915/i915_vma.c | 6 ++++++ drivers/gpu/drm/i915/i915_vma.h | 16 +++++++++++++++- 6 files changed, 30 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 28294470ae31..7b41a1799a03 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -111,8 +111,8 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) u64 size = 0; struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node)) + for_each_ggtt_vma(vma, obj) { + if (drm_mm_node_allocated(&vma->node)) size += vma->node.size; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 67dc11effc8e..c7b5db78fbb4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -714,10 +714,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - + for_each_ggtt_vma(vma, obj) { if (vma->iomap) continue; @@ -1569,10 +1566,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - + for_each_ggtt_vma(vma, obj) { if (i915_vma_is_active(vma)) continue; @@ -2051,12 +2045,8 @@ static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->anon_inode->i_mapping); - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - + for_each_ggtt_vma(vma, obj) i915_vma_unset_userfault(vma); - } } /** @@ -3822,7 +3812,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * dropped the fence as all snoopable access is * supposed to be linear. */ - list_for_each_entry(vma, &obj->vma_list, obj_link) { + for_each_ggtt_vma(vma, obj) { ret = i915_vma_put_fence(vma); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 86fbd67388f3..c4bb02ecd217 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3620,10 +3620,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) bool ggtt_bound = false; struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->vm != &ggtt->base) - continue; - + for_each_ggtt_vma(vma, obj) { if (!i915_vma_unbind(vma)) continue; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index b85d7ebd9bee..d9dc9df523b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -205,10 +205,7 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, if (tiling_mode == I915_TILING_NONE) return 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - + for_each_ggtt_vma(vma, obj) { if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; @@ -285,10 +282,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, } mutex_unlock(&obj->mm.lock); - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - + for_each_ggtt_vma(vma, obj) { vma->fence_size = i915_gem_fence_size(i915, vma->size, tiling, stride); vma->fence_alignment = diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0ebd75693505..92c11e70fea4 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -142,6 +142,12 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); + /* + * We put the GGTT vma at the start of the vma-list, followed + * by the ppGGTT vma. This allows us to break early when + * iterating over only the GGTT vma for an object, see + * for_each_ggtt_vma() + */ vma->flags |= I915_VMA_GGTT; list_add(&vma->obj_link, &obj->vma_list); } else { diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index f636243eb8f7..fd5b84904f7c 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -408,5 +408,19 @@ i915_vma_unpin_fence(struct i915_vma *vma) __i915_vma_unpin_fence(vma); } -#endif +#define for_each_until(cond) if (cond) break; else +/** + * for_each_ggtt_vma - Iterate over the GGTT VMA belonging to an object. + * @V: the #i915_vma iterator + * @OBJ: the #drm_i915_gem_object + * + * GGTT VMA are placed at the being of the object's vma_list, see + * vma_create(), so we can stop our walk as soon as we see a ppgtt VMA, + * or the list is empty ofc. + */ +#define for_each_ggtt_vma(V, OBJ) \ + list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \ + for_each_until(!i915_vma_is_ggtt(V)) + +#endif From 2fc7a06ad518610d197bafe7d2d3e8a8f9bb181e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 Dec 2017 22:24:34 +0000 Subject: [PATCH 34/87] drm/i915/execlists: Cache ELSP register offset Currently on every submission, we recalculate the ELSP register offset for the engine, after chasing the pointers to find the iomem base. Since this is fixed for the lifetime of the driver, record the offset in the execlists struct. In practice the difference is negligible, it just happens to remove 27 bytes of eyesore pointer dancing from next to the hottest instruction (which is itself due to stalling for a cache miss) in perf profiles of the execlists_submission_tasklet(). v2: Trim off one more elsp local. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Michel Thierry Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171207222434.17686-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 13 ++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2a8160f603ab..2e38fbfdf08f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -431,8 +431,6 @@ static inline void elsp_write(u64 desc, u32 __iomem *elsp) static void execlists_submit_ports(struct intel_engine_cs *engine) { struct execlist_port *port = engine->execlists.port; - u32 __iomem *elsp = - engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); unsigned int n; for (n = execlists_num_ports(&engine->execlists); n--; ) { @@ -458,7 +456,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = 0; } - elsp_write(desc, elsp); + elsp_write(desc, engine->execlists.elsp); } execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); } @@ -496,8 +494,6 @@ static void inject_preempt_context(struct intel_engine_cs *engine) { struct intel_context *ce = &engine->i915->preempt_context->engine[engine->id]; - u32 __iomem *elsp = - engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); unsigned int n; GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID); @@ -510,9 +506,9 @@ static void inject_preempt_context(struct intel_engine_cs *engine) GEM_TRACE("\n"); for (n = execlists_num_ports(&engine->execlists); --n; ) - elsp_write(0, elsp); + elsp_write(0, engine->execlists.elsp); - elsp_write(ce->lrc_desc, elsp); + elsp_write(ce->lrc_desc, engine->execlists.elsp); execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK); } @@ -1509,6 +1505,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) execlists->csb_head = -1; execlists->active = 0; + execlists->elsp = + dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + /* After a GPU reset, we may have requests to replay */ if (execlists->first) tasklet_schedule(&execlists->tasklet); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3d097bbeb2d9..21d6ed67a813 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -198,6 +198,11 @@ struct intel_engine_execlists { */ bool no_priolist; + /** + * @elsp: the ExecList Submission Port register + */ + u32 __iomem *elsp; + /** * @port: execlist port states * From 4447f423ff0822f3eacc31bbaf445226fa312b84 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 8 Dec 2017 14:56:20 +0800 Subject: [PATCH 35/87] drm/i915/gvt: Refine the ring mmio list definition To improve the readability, let's remove the hard code for each mmio definition. The raw offset remained as a comment, which give us an offset based view. This refine is to make it convenient for new platform enabling. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 180 ++++++++++++++++-------------- 1 file changed, 97 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 0672178548ef..43abca5dbe75 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -45,100 +45,114 @@ struct render_mmio { u32 value; }; -static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = { - {RCS, _MMIO(0x229c), 0xffff, false}, - {RCS, _MMIO(0x2248), 0x0, false}, - {RCS, _MMIO(0x2098), 0x0, false}, - {RCS, _MMIO(0x20c0), 0xffff, true}, - {RCS, _MMIO(0x24d0), 0, false}, - {RCS, _MMIO(0x24d4), 0, false}, - {RCS, _MMIO(0x24d8), 0, false}, - {RCS, _MMIO(0x24dc), 0, false}, - {RCS, _MMIO(0x24e0), 0, false}, - {RCS, _MMIO(0x24e4), 0, false}, - {RCS, _MMIO(0x24e8), 0, false}, - {RCS, _MMIO(0x24ec), 0, false}, - {RCS, _MMIO(0x24f0), 0, false}, - {RCS, _MMIO(0x24f4), 0, false}, - {RCS, _MMIO(0x24f8), 0, false}, - {RCS, _MMIO(0x24fc), 0, false}, - {RCS, _MMIO(0x7004), 0xffff, true}, - {RCS, _MMIO(0x7008), 0xffff, true}, - {RCS, _MMIO(0x7000), 0xffff, true}, - {RCS, _MMIO(0x7010), 0xffff, true}, - {RCS, _MMIO(0x7300), 0xffff, true}, - {RCS, _MMIO(0x83a4), 0xffff, true}, +/** + * Defined in Intel Open Source PRM. + * Ref: https://01.org/linuxgraphics/documentation/hardware-specification-prms + */ +#define TRVATTL3PTRDW(i) _MMIO(0x4de0 + (i)*4) +#define TRNULLDETCT _MMIO(0x4de8) +#define TRINVTILEDETCT _MMIO(0x4dec) +#define TRVADR _MMIO(0x4df0) +#define TRTTE _MMIO(0x4df4) +#define RING_EXCC(base) _MMIO((base) + 0x28) +#define RING_GFX_MODE(base) _MMIO((base) + 0x29c) +#define VF_GUARDBAND _MMIO(0x83a4) - {BCS, _MMIO(0x2229c), 0xffff, false}, - {BCS, _MMIO(0x2209c), 0xffff, false}, - {BCS, _MMIO(0x220c0), 0xffff, false}, - {BCS, _MMIO(0x22098), 0x0, false}, - {BCS, _MMIO(0x22028), 0x0, false}, +/* Raw offset is appened to each line for convenience. */ +static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = { + {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ + {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ + {RCS, INSTPM, 0xffff, true}, /* 0x20c0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ + {RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ + {RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ + {RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ + {RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ + {RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ + {RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ + + {BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ + {BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ + {BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ + {BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ + {BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ }; static struct render_mmio gen9_render_mmio_list[] __cacheline_aligned = { - {RCS, _MMIO(0x229c), 0xffff, false}, - {RCS, _MMIO(0x2248), 0x0, false}, - {RCS, _MMIO(0x2098), 0x0, false}, - {RCS, _MMIO(0x20c0), 0xffff, true}, - {RCS, _MMIO(0x24d0), 0, false}, - {RCS, _MMIO(0x24d4), 0, false}, - {RCS, _MMIO(0x24d8), 0, false}, - {RCS, _MMIO(0x24dc), 0, false}, - {RCS, _MMIO(0x24e0), 0, false}, - {RCS, _MMIO(0x24e4), 0, false}, - {RCS, _MMIO(0x24e8), 0, false}, - {RCS, _MMIO(0x24ec), 0, false}, - {RCS, _MMIO(0x24f0), 0, false}, - {RCS, _MMIO(0x24f4), 0, false}, - {RCS, _MMIO(0x24f8), 0, false}, - {RCS, _MMIO(0x24fc), 0, false}, - {RCS, _MMIO(0x7004), 0xffff, true}, - {RCS, _MMIO(0x7008), 0xffff, true}, - {RCS, _MMIO(0x7000), 0xffff, true}, - {RCS, _MMIO(0x7010), 0xffff, true}, - {RCS, _MMIO(0x7300), 0xffff, true}, - {RCS, _MMIO(0x83a4), 0xffff, true}, + {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ + {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ + {RCS, INSTPM, 0xffff, true}, /* 0x20c0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */ + {RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */ + {RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */ + {RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */ + {RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */ + {RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */ + {RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */ + {RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */ - {RCS, _MMIO(0x40e0), 0, false}, - {RCS, _MMIO(0x40e4), 0, false}, - {RCS, _MMIO(0x2580), 0xffff, true}, - {RCS, _MMIO(0x7014), 0xffff, true}, - {RCS, _MMIO(0x20ec), 0xffff, false}, - {RCS, _MMIO(0xb118), 0, false}, - {RCS, _MMIO(0xe100), 0xffff, true}, - {RCS, _MMIO(0xe180), 0xffff, true}, - {RCS, _MMIO(0xe184), 0xffff, true}, - {RCS, _MMIO(0xe188), 0xffff, true}, - {RCS, _MMIO(0xe194), 0xffff, true}, - {RCS, _MMIO(0x4de0), 0, false}, - {RCS, _MMIO(0x4de4), 0, false}, - {RCS, _MMIO(0x4de8), 0, false}, - {RCS, _MMIO(0x4dec), 0, false}, - {RCS, _MMIO(0x4df0), 0, false}, - {RCS, _MMIO(0x4df4), 0, false}, + {RCS, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */ + {RCS, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */ + {RCS, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */ + {RCS, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */ + {RCS, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */ + {RCS, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */ + {RCS, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ + {RCS, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ + {RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ + {RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */ + {RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */ + {RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */ + {RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */ + {RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */ + {RCS, TRINVTILEDETCT, 0, false}, /* 0x4dec */ + {RCS, TRVADR, 0, false}, /* 0x4df0 */ + {RCS, TRTTE, 0, false}, /* 0x4df4 */ - {BCS, _MMIO(0x2229c), 0xffff, false}, - {BCS, _MMIO(0x2209c), 0xffff, false}, - {BCS, _MMIO(0x220c0), 0xffff, false}, - {BCS, _MMIO(0x22098), 0x0, false}, - {BCS, _MMIO(0x22028), 0x0, false}, + {BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */ + {BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */ + {BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ + {BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ + {BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ - {VCS2, _MMIO(0x1c028), 0xffff, false}, + {VCS2, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */ - {VECS, _MMIO(0x1a028), 0xffff, false}, + {VECS, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */ - {RCS, _MMIO(0x7304), 0xffff, true}, - {RCS, _MMIO(0x2248), 0x0, false}, - {RCS, _MMIO(0x940c), 0x0, false}, - {RCS, _MMIO(0x4ab8), 0x0, false}, + {RCS, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */ + {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ + {RCS, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */ + {RCS, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */ - {RCS, _MMIO(0x4ab0), 0x0, false}, - {RCS, _MMIO(0x20d4), 0x0, false}, + {RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ + {RCS, GEN9_CSFE_CHICKEN1_RCS, 0x0, false}, /* 0x20d4 */ - {RCS, _MMIO(0xb004), 0x0, false}, - {RCS, _MMIO(0x20a0), 0x0, false}, - {RCS, _MMIO(0x20e4), 0xffff, false}, + {RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ + {RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ + {RCS, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */ }; static u32 gen9_render_mocs[I915_NUM_ENGINES][64]; From 83164886e4559f87015a33780852a64cdd6e4e50 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 8 Dec 2017 14:56:21 +0800 Subject: [PATCH 36/87] drm/i915/gvt: Select appropriate mmio list at initialization time Select appropriate mmio list at initialization time, so we don't need to do duplicated work at where requires the mmio list. V2: - Add a termination mark of mmio list. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 2 ++ drivers/gpu/drm/i915/gvt/gvt.h | 2 ++ drivers/gpu/drm/i915/gvt/render.c | 60 ++++++++++++++----------------- drivers/gpu/drm/i915/gvt/render.h | 9 +++++ 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 9a5dce3aa10a..643bb961d40d 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -386,6 +386,8 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) if (ret) goto out_clean_idr; + intel_gvt_init_engine_mmio_context(gvt); + ret = intel_gvt_load_firmware(gvt); if (ret) goto out_clean_mmio_info; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 77df9bad5dea..39c2f3a4588e 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -310,6 +310,8 @@ struct intel_gvt { wait_queue_head_t service_thread_wq; unsigned long service_request; + struct engine_mmio *engine_mmio_list; + struct dentry *debugfs_root; }; diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 43abca5dbe75..3e675f81815f 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -37,14 +37,6 @@ #include "gvt.h" #include "trace.h" -struct render_mmio { - int ring_id; - i915_reg_t reg; - u32 mask; - bool in_context; - u32 value; -}; - /** * Defined in Intel Open Source PRM. * Ref: https://01.org/linuxgraphics/documentation/hardware-specification-prms @@ -59,7 +51,7 @@ struct render_mmio { #define VF_GUARDBAND _MMIO(0x83a4) /* Raw offset is appened to each line for convenience. */ -static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = { +static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = { {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ @@ -88,9 +80,10 @@ static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = { {BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */ {BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */ {BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */ + { /* Terminated */ } }; -static struct render_mmio gen9_render_mmio_list[] __cacheline_aligned = { +static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { {RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */ {RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */ {RCS, HWSTAM, 0x0, false}, /* 0x2098 */ @@ -153,6 +146,7 @@ static struct render_mmio gen9_render_mmio_list[] __cacheline_aligned = { {RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ {RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ {RCS, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */ + { /* Terminated */ } }; static u32 gen9_render_mocs[I915_NUM_ENGINES][64]; @@ -282,21 +276,14 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); i915_reg_t last_reg = _MMIO(0); - struct render_mmio *mmio; + struct engine_mmio *mmio; u32 v; - int i, array_size; - if (IS_SKYLAKE(vgpu->gvt->dev_priv) - || IS_KABYLAKE(vgpu->gvt->dev_priv)) { - mmio = gen9_render_mmio_list; - array_size = ARRAY_SIZE(gen9_render_mmio_list); + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) load_mocs(vgpu, ring_id); - } else { - mmio = gen8_render_mmio_list; - array_size = ARRAY_SIZE(gen8_render_mmio_list); - } - for (i = 0; i < array_size; i++, mmio++) { + mmio = vgpu->gvt->engine_mmio_list; + while (i915_mmio_reg_offset((mmio++)->reg)) { if (mmio->ring_id != ring_id) continue; @@ -326,7 +313,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) } /* Make sure the swiched MMIOs has taken effect. */ - if (likely(INTEL_GVT_MMIO_OFFSET(last_reg))) + if (likely(i915_mmio_reg_offset(last_reg))) I915_READ_FW(last_reg); handle_tlb_pending_event(vgpu, ring_id); @@ -336,21 +323,15 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct render_mmio *mmio; i915_reg_t last_reg = _MMIO(0); + struct engine_mmio *mmio; u32 v; - int i, array_size; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - mmio = gen9_render_mmio_list; - array_size = ARRAY_SIZE(gen9_render_mmio_list); + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) restore_mocs(vgpu, ring_id); - } else { - mmio = gen8_render_mmio_list; - array_size = ARRAY_SIZE(gen8_render_mmio_list); - } - for (i = 0; i < array_size; i++, mmio++) { + mmio = vgpu->gvt->engine_mmio_list; + while (i915_mmio_reg_offset((mmio++)->reg)) { if (mmio->ring_id != ring_id) continue; @@ -374,7 +355,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) } /* Make sure the swiched MMIOs has taken effect. */ - if (likely(INTEL_GVT_MMIO_OFFSET(last_reg))) + if (likely(i915_mmio_reg_offset(last_reg))) I915_READ_FW(last_reg); } @@ -419,3 +400,16 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } + +/** + * intel_gvt_init_engine_mmio_context - Initiate the engine mmio list + * @gvt: GVT device + * + */ +void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt) +{ + if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) + gvt->engine_mmio_list = gen9_engine_mmio_list; + else + gvt->engine_mmio_list = gen8_engine_mmio_list; +} diff --git a/drivers/gpu/drm/i915/gvt/render.h b/drivers/gpu/drm/i915/gvt/render.h index 91db1d39d28f..ca2c6a745673 100644 --- a/drivers/gpu/drm/i915/gvt/render.h +++ b/drivers/gpu/drm/i915/gvt/render.h @@ -36,8 +36,17 @@ #ifndef __GVT_RENDER_H__ #define __GVT_RENDER_H__ +struct engine_mmio { + int ring_id; + i915_reg_t reg; + u32 mask; + bool in_context; + u32 value; +}; + void intel_gvt_switch_mmio(struct intel_vgpu *pre, struct intel_vgpu *next, int ring_id); +void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt); #endif From cf27b950346d5201c291c4e189ed1436a136ae4d Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 8 Dec 2017 14:56:22 +0800 Subject: [PATCH 37/87] drm/i915/gvt: Remove MMIO barrier in MMIO switch After engine mmio switched, software still need write workload submission registers. So we can remove the MMIO barriar in MMIO switch. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 3e675f81815f..4c8e1285c607 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -275,7 +275,6 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - i915_reg_t last_reg = _MMIO(0); struct engine_mmio *mmio; u32 v; @@ -305,17 +304,12 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) v = vgpu_vreg(vgpu, mmio->reg); I915_WRITE_FW(mmio->reg, v); - last_reg = mmio->reg; trace_render_mmio(vgpu->id, "load", i915_mmio_reg_offset(mmio->reg), mmio->value, v); } - /* Make sure the swiched MMIOs has taken effect. */ - if (likely(i915_mmio_reg_offset(last_reg))) - I915_READ_FW(last_reg); - handle_tlb_pending_event(vgpu, ring_id); } @@ -323,7 +317,6 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - i915_reg_t last_reg = _MMIO(0); struct engine_mmio *mmio; u32 v; @@ -347,16 +340,11 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) continue; I915_WRITE_FW(mmio->reg, v); - last_reg = mmio->reg; trace_render_mmio(vgpu->id, "restore", i915_mmio_reg_offset(mmio->reg), mmio->value, v); } - - /* Make sure the swiched MMIOs has taken effect. */ - if (likely(i915_mmio_reg_offset(last_reg))) - I915_READ_FW(last_reg); } /** From 1aec75ee327f2f2085a4e2b060a3d999b8f4d925 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 8 Dec 2017 14:56:23 +0800 Subject: [PATCH 38/87] drm/i915/gvt: Rename file render.{c, h} to mmio_context.{c, h} Rename the files to reflect their real role - to switch the mmio context of each vGPU engine. v2: update Makefile. Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/Makefile | 2 +- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- drivers/gpu/drm/i915/gvt/{render.c => mmio_context.c} | 0 drivers/gpu/drm/i915/gvt/{render.h => mmio_context.h} | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename drivers/gpu/drm/i915/gvt/{render.c => mmio_context.c} (100%) rename drivers/gpu/drm/i915/gvt/{render.h => mmio_context.h} (100%) diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index cae06c1dcdcd..0ee9c6250e90 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -1,7 +1,7 @@ GVT_DIR := gvt GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ - execlist.o scheduler.o sched_policy.o render.o cmd_parser.o debugfs.o \ + execlist.o scheduler.o sched_policy.o mmio_context.o cmd_parser.o debugfs.o \ fb_decoder.o dmabuf.o ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 39c2f3a4588e..b4747c270dcb 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -44,7 +44,7 @@ #include "execlist.h" #include "scheduler.h" #include "sched_policy.h" -#include "render.h" +#include "mmio_context.h" #include "cmd_parser.h" #include "fb_decoder.h" #include "dmabuf.h" diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/mmio_context.c similarity index 100% rename from drivers/gpu/drm/i915/gvt/render.c rename to drivers/gpu/drm/i915/gvt/mmio_context.c diff --git a/drivers/gpu/drm/i915/gvt/render.h b/drivers/gpu/drm/i915/gvt/mmio_context.h similarity index 100% rename from drivers/gpu/drm/i915/gvt/render.h rename to drivers/gpu/drm/i915/gvt/mmio_context.h From 072ec93d50fd4c4b27ea44fff160a95c2b581eb1 Mon Sep 17 00:00:00 2001 From: Pei Zhang Date: Fri, 8 Dec 2017 15:31:12 +0800 Subject: [PATCH 39/87] drm/i915/gvt/kvmgt: fill info for ROM/VGA region Both ROM/VGA region are not supported for vGPU in GVT. But if the device model want to get those region, we should return the correct information but not leave the structure with random data. Change to same operation of BAR3-BAR5 which are also not supported by vGPU. Refer to function @intel_vgpu_rw. Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index b8a85e08091a..f86983d6655b 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1029,13 +1029,17 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX: info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.size = 0; - info.flags = 0; + gvt_dbg_core("get region info bar:%d\n", info.index); break; case VFIO_PCI_ROM_REGION_INDEX: case VFIO_PCI_VGA_REGION_INDEX: + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = 0; + info.flags = 0; + gvt_dbg_core("get region info index:%d\n", info.index); break; default: From 6ee942d5f7e3e630d3a2517e75969ce5d07c87d6 Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 8 Dec 2017 15:17:38 +0800 Subject: [PATCH 40/87] drm/i915/gvt: Refine dmabuf_obj cleanup process In the process of dmabuf_obj cleanup, the dmabuf_obj might be freed during dmabuf_obj_put leaking intel_gvt_hypervisor_put_vfio_device. Move intel_gvt_hypervisor_put_vfio_device and all the other dmabuf_obj ops in front of dmabuf_obj_put and let every dmabuf_obj have a chance to call intel_gvt_hypervisor_put_vfio_device to fix this leaking issue. Fixes: e3a0d7976c53 ("drm/i915/gvt: Handle orphan dmabuf_objs") Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 9c40a67ecdd6..2ab584f97dfb 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -520,19 +520,18 @@ void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu) list_for_each_safe(pos, n, &vgpu->dmabuf_obj_list_head) { dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, list); + dmabuf_obj->vgpu = NULL; + + idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id); + intel_gvt_hypervisor_put_vfio_device(vgpu); + list_del(pos); + + /* dmabuf_obj might be freed in dmabuf_obj_put */ if (dmabuf_obj->initref) { dmabuf_obj->initref = false; dmabuf_obj_put(dmabuf_obj); } - idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id); - - if (dmabuf_obj->vgpu) - intel_gvt_hypervisor_put_vfio_device(vgpu); - - list_del(pos); - dmabuf_obj->vgpu = NULL; - } mutex_unlock(&vgpu->dmabuf_lock); } From b68763741aa29f2541c7ca58bcb0c2bb6cb5f449 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 5 Dec 2017 13:28:54 +0000 Subject: [PATCH 41/87] drm/i915: Restore GT performance in headless mode with DMC loaded It seems that the DMC likes to transition between the DC states a lot when there are no connected displays (no active power domains) during command submission. This activity on DC states has a negative impact on the performance of the chip with huge latencies observed in the interrupt handlers and elsewhere. Simple tests like igt/gem_latency -n 0 are slowed down by a factor of eight. Work around it by introducing a new power domain named, POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is held for the duration of command submission activity. CNL has the same problem which will be addressed as a follow-up. Doing that requires a fix for a DC6 context corruption problem in the CNL DMC firmware which is yet to be released. v2: * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson) * Protect macro body with braces. (Jani Nikula) v3: * Add dedicated power domain for clarity. (Chris, Imre) * Commit message and comment text updates. * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC firmware release. v4: * Power domain should be inner to device runtime pm. (Chris) * Simplify NEEDS_CSR_GT_PERF_WA macro. (Chris) * Handle async DMC loading by moving the GT_IRQ power domain logic into intel_runtime_pm. (Daniel, Chris) * Include small core GEN9 as well. (Imre) v5 * Special handling for async DMC load is not needed since on failure the power domain reference is kept permanently taken. (Imre) v6: * Drop the NEEDS_CSR_GT_PERF_WA macro since all firmwares have now been deployed. (Imre, Chris) Signed-off-by: Tvrtko Ursulin Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572 Testcase: igt/gem_exec_nop/headless Cc: Imre Deak Acked-by: Chris Wilson (v2) Cc: Chris Wilson Cc: Dmitry Rogozhkin Reviewed-by: Daniel Vetter (v5) Reviewed-by: Chris Wilson [Imre: Add note about applying the WA on CNL as a follow-up] Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20171205132854.26380-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 +++ drivers/gpu/drm/i915/i915_gem_request.c | 14 ++++++++++++++ drivers/gpu/drm/i915/intel_runtime_pm.c | 5 +++++ 4 files changed, 23 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f9386e793c87..d57859cfad8e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -398,6 +398,7 @@ enum intel_display_power_domain { POWER_DOMAIN_AUX_D, POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, + POWER_DOMAIN_GT_IRQ, POWER_DOMAIN_INIT, POWER_DOMAIN_NUM, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c7b5db78fbb4..fcc9b53864f0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3386,6 +3386,9 @@ i915_gem_idle_work_handler(struct work_struct *work) if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); + + intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ); + intel_runtime_pm_put(dev_priv); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a90bdd26571f..c28a4ceb016d 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -252,6 +252,20 @@ static void mark_busy(struct drm_i915_private *i915) GEM_BUG_ON(!i915->gt.active_requests); intel_runtime_pm_get_noresume(i915); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + i915->gt.awake = true; intel_enable_gt_powersave(i915); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8315499452dc..96ab74f3d101 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -130,6 +130,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "INIT"; case POWER_DOMAIN_MODESET: return "MODESET"; + case POWER_DOMAIN_GT_IRQ: + return "GT_IRQ"; default: MISSING_CASE(domain); return "?"; @@ -1705,6 +1707,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -1727,6 +1730,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -1785,6 +1789,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS ( \ GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) From 7a1530d7f91fe670b8033aadb1db4dae64512abb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 7 Dec 2017 15:32:02 +0100 Subject: [PATCH 42/87] intel/atomic: Stop updating legacy fb parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even fbc isn't using this stuff anymore, so time to remove it. Cleaning up one small piece of the atomic conversion cruft at the time ... Quick explanation on why the plane->fb assignment is ok to delete: The core code takes care of the refcounting and legacy ->fb pointer updating, but drivers are allowed to update it ahead of time. Most legacy modeset drivers did that as part of their set_config callback (since that's how the legacy/crtc helpers worked). In i915 we only need that to make the fbc code happy. v2: don't nuke the assignement of intel_crtc->config, I accidentally set CI ablaze :-) Spotted by Maarten. And better explain why nuking the ->fb assignement shouldn't set off alarm bells. Cc: Paulo Zanoni Cc: Ville Syrjälä Cc: Maarten Lankhorst Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171207143202.6021-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/intel_display.c | 31 +++------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 47a2f6acee50..e355d780a4cd 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10966,31 +10966,6 @@ intel_modeset_pipe_config(struct drm_crtc *crtc, return ret; } -static void -intel_modeset_update_crtc_state(struct drm_atomic_state *state) -{ - struct drm_crtc *crtc; - struct drm_crtc_state *new_crtc_state; - int i; - - /* Double check state. */ - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { - to_intel_crtc(crtc)->config = to_intel_crtc_state(new_crtc_state); - - /* - * Update legacy state to satisfy fbc code. This can - * be removed when fbc uses the atomic state. - */ - if (drm_atomic_get_existing_plane_state(state, crtc->primary)) { - struct drm_plane_state *plane_state = crtc->primary->state; - - crtc->primary->fb = plane_state->fb; - crtc->x = plane_state->src_x >> 16; - crtc->y = plane_state->src_y >> 16; - } - } -} - static bool intel_fuzzy_clock_check(int clock1, int clock2) { int diff; @@ -12363,9 +12338,9 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) } } - /* Only after disabling all output pipelines that will be changed can we - * update the the output configuration. */ - intel_modeset_update_crtc_state(state); + /* FIXME: Eventually get rid of our intel_crtc->config pointer */ + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) + to_intel_crtc(crtc)->config = to_intel_crtc_state(new_crtc_state); if (intel_state->modeset) { drm_atomic_helper_update_legacy_modeset_state(state->dev, state); From 3e72be177cf19ab3d62b3084d424dce7e71d847f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 Dec 2017 22:00:25 +0000 Subject: [PATCH 43/87] drm/i915: Drop fb reference on load_detect_pipe failure path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When intel_modeset_setup_plane_state() fails drop the local framebuffer reference before jumping to the error, otherwise we leak the framebuffer. Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Daniel Vetter Fixes: edde361711ef ("drm/i915: Use atomic state to obtain load detection crtc, v3.") Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171207220025.22698-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_display.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e355d780a4cd..be56f0150cd9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9939,11 +9939,10 @@ int intel_get_load_detect_pipe(struct drm_connector *connector, } ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0); + drm_framebuffer_put(fb); if (ret) goto fail; - drm_framebuffer_put(fb); - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) goto fail; From e8a70cab253cf4c54855ffc2b1997e501f03e2a4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Dec 2017 01:22:59 +0000 Subject: [PATCH 44/87] drm/i915: Use snprintf to avoid line-break when pretty-printing engines When printing the execlist ports, we first print the ELSP header then follow it with the pretty-printed request. Since switching to drm_printer and show the output via printk, it automatically appends a newline to each call (unlike the old seq_printf output). To avoid the unwanted line break, construct the ELSP request header in a temporary buffer. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171208012303.25504-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 86d4c85c8725..dbd188a23cf1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1674,6 +1674,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_gem_request *rq; struct rb_node *rb; + char hdr[80]; u64 addr; drm_printf(m, "%s\n", engine->name); @@ -1786,12 +1787,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) rq = port_unpack(&execlists->port[idx], &count); if (rq) { - drm_printf(m, "\t\tELSP[%d] count=%d, ", - idx, count); - print_request(m, rq, "rq: "); + snprintf(hdr, sizeof(hdr), + "\t\tELSP[%d] count=%d, rq: ", + idx, count); + print_request(m, rq, hdr); } else { - drm_printf(m, "\t\tELSP[%d] idle\n", - idx); + drm_printf(m, "\t\tELSP[%d] idle\n", idx); } } drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); From 0db18b17c8bca02638dcdc55dd458e04e18947b1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Dec 2017 01:23:00 +0000 Subject: [PATCH 45/87] drm/i915: Make engine state pretty-printer header configurable Pass in a format string (and args) to specify the header to be emitted along with the engine state when pretty-printing. This allows the header to be emitted inside the drm_printer stream, so sharing the same prefix and output characteristics (e.g. debug level and filtering). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171208012303.25504-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 15 ++++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++++- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 7 ++++--- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7b41a1799a03..9ccc6bcc7069 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3213,7 +3213,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) p = drm_seq_file_printer(m); for_each_engine(engine, dev_priv, id) - intel_engine_dump(engine, &p); + intel_engine_dump(engine, &p, "%s\n", engine->name); intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index dbd188a23cf1..60100ab13b42 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1596,7 +1596,7 @@ void intel_engines_park(struct drm_i915_private *i915) dev_err(i915->drm.dev, "%s is not idle before parking\n", engine->name); - intel_engine_dump(engine, &p); + intel_engine_dump(engine, &p, NULL); } if (engine->park) @@ -1666,7 +1666,9 @@ static void print_request(struct drm_printer *m, rq->timeline->common->name); } -void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) +void intel_engine_dump(struct intel_engine_cs *engine, + struct drm_printer *m, + const char *header, ...) { struct intel_breadcrumbs * const b = &engine->breadcrumbs; const struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1677,7 +1679,14 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m) char hdr[80]; u64 addr; - drm_printf(m, "%s\n", engine->name); + if (header) { + va_list ap; + + va_start(ap, header); + drm_vprintf(m, header, &ap); + va_end(ap); + } + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 183165b9b3fb..c5ff203e42d6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -1013,7 +1013,10 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); bool intel_engine_can_store_dword(struct intel_engine_cs *engine); -void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p); +__printf(3, 4) +void intel_engine_dump(struct intel_engine_cs *engine, + struct drm_printer *m, + const char *header, ...); struct intel_engine_cs * intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 1bbb8c46e2d9..f98546b8a7fa 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -619,7 +619,7 @@ static int igt_wait_reset(void *arg) pr_err("Failed to start request %x, at %x\n", rq->fence.seqno, hws_seqno(&h, rq)); - intel_engine_dump(rq->engine, &p); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); i915_reset(i915, 0); i915_gem_set_wedged(i915); @@ -714,7 +714,8 @@ static int igt_reset_queue(void *arg) pr_err("Failed to start request %x, at %x\n", prev->fence.seqno, hws_seqno(&h, prev)); - intel_engine_dump(rq->engine, &p); + intel_engine_dump(prev->engine, &p, + "%s\n", prev->engine->name); i915_gem_request_put(rq); i915_gem_request_put(prev); @@ -820,7 +821,7 @@ static int igt_handle_error(void *arg) pr_err("Failed to start request %x, at %x\n", rq->fence.seqno, hws_seqno(&h, rq)); - intel_engine_dump(rq->engine, &p); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); i915_reset(i915, 0); i915_gem_set_wedged(i915); From 832265d38cf4d27984b84036b38568d71917dc8a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Dec 2017 01:23:01 +0000 Subject: [PATCH 46/87] drm/i915: Include engine state on detecting a missed breadcrumb/seqno Now that we have a common engine state pretty printer, we can use that instead of the adhoc information printed when we miss a breadcrumb. v2: Rearrange intel_engine_disarm_breadcrumbs() to avoid calling intel_engine_dump() under the rb spinlock (Mika) and to pretty-print the error state early so that we include the full list of waiters. v3: Pass missed breadcrumb msg to pretty-printer as the header v4: Preserve DRM_DEBUG_DRIVER filtering. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171208012303.25504-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 25 +++++++++++++----------- drivers/gpu/drm/i915/intel_engine_cs.c | 6 ++++++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5ae2d276f7f3..24c6fefdd0b1 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -64,12 +64,13 @@ static unsigned long wait_timeout(void) static noinline void missed_breadcrumb(struct intel_engine_cs *engine) { - DRM_DEBUG_DRIVER("%s missed breadcrumb at %pS, irq posted? %s, current seqno=%x, last=%x\n", - engine->name, __builtin_return_address(0), - yesno(test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted)), - intel_engine_get_seqno(engine), - intel_engine_last_submit(engine)); + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer(__func__); + + intel_engine_dump(engine, &p, + "%s missed breadcrumb at %pS\n", + engine->name, __builtin_return_address(0)); + } set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } @@ -213,28 +214,30 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine) void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct intel_wait *wait, *n, *first; + struct intel_wait *wait, *n; if (!b->irq_armed) return; - /* We only disarm the irq when we are idle (all requests completed), + /* + * We only disarm the irq when we are idle (all requests completed), * so if the bottom-half remains asleep, it missed the request * completion. */ + if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) + missed_breadcrumb(engine); spin_lock_irq(&b->rb_lock); spin_lock(&b->irq_lock); - first = fetch_and_zero(&b->irq_wait); + b->irq_wait = NULL; if (b->irq_armed) __intel_engine_disarm_breadcrumbs(engine); spin_unlock(&b->irq_lock); rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { RB_CLEAR_NODE(&wait->node); - if (wake_up_process(wait->tsk) && wait == first) - missed_breadcrumb(engine); + wake_up_process(wait->tsk); } b->waiters = RB_ROOT; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 60100ab13b42..ac62ca490551 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1836,6 +1836,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, } spin_unlock_irq(&b->rb_lock); + drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", + engine->irq_posted, + yesno(test_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted)), + yesno(test_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted))); drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); drm_printf(m, "\n"); } From 528dd16a7c228e1383a291781744ed2c0ac99429 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Dec 2017 01:23:02 +0000 Subject: [PATCH 47/87] drm/i915: Include the global reset count for intel_engine_dump() Since a global reset affects the engine, include that along side the per-engine reset counter when pretty printing the engine state in intel_engine_dump(). Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171208012303.25504-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ac62ca490551..25fae8ff0e67 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1693,8 +1693,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, engine->hangcheck.seqno, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), engine->timeline->inflight_seqnos); - drm_printf(m, "\tReset count: %d\n", - i915_reset_engine_count(error, engine)); + drm_printf(m, "\tReset count: %d (global %d)\n", + i915_reset_engine_count(error, engine), + i915_reset_count(error)); rcu_read_lock(); From 2d8d1afb4d7d6d88a1ac19010d9a1a9594188ce3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Dec 2017 01:23:03 +0000 Subject: [PATCH 48/87] drm/i915: Add is-wedged flag to intel_engine_dump() Comparing the state tested by intel_engine_is_idle() and printed by intel_engine_dump(), the only bit not shown is whether or not the device is wedged. Add that little bit of information to the pretty printer so that if the engine fails to idle we can see why. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20171208012303.25504-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 25fae8ff0e67..04c31475a7ae 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1687,6 +1687,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, va_end(ap); } + if (i915_terminally_wedged(&engine->i915->gpu_error)) + drm_printf(m, "*** WEDGED ***\n"); + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), From b9dfda561585b3c1cf4381c9719fee860bfd2428 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Dec 2017 10:44:17 +0000 Subject: [PATCH 49/87] drm/i915: Remove debugfs/i915_seqno_info The per-engine seqno info is now available from debugfs/i915_engine_info obsoleting debugfs/i915_seqno_info, so remove it. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171209104418.4223-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9ccc6bcc7069..bebf33333ae3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -683,19 +683,6 @@ static void i915_ring_seqno_info(struct seq_file *m, spin_unlock_irq(&b->rb_lock); } -static int i915_gem_seqno_info(struct seq_file *m, void *data) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) - i915_ring_seqno_info(m, engine); - - return 0; -} - - static int i915_interrupt_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -4672,7 +4659,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_gem_objects", i915_gem_object_info, 0}, {"i915_gem_gtt", i915_gem_gtt_info, 0}, {"i915_gem_stolen", i915_gem_stolen_list_info }, - {"i915_gem_seqno", i915_gem_seqno_info, 0}, {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, {"i915_gem_interrupt", i915_interrupt_info, 0}, {"i915_gem_batch_pool", i915_gem_batch_pool_info, 0}, From d5acadfe7de71fac2a562df0b0a1d37973b50dd3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Dec 2017 10:44:18 +0000 Subject: [PATCH 50/87] drm/i915: Stop showing seqno info from debugfs/i915_interrupt_info Since the seqno information shown from i915_interrupt_info is just a small subset of i915_engine_info, remove it. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171209104418.4223-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 24 ++---------------------- drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++++ 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bebf33333ae3..f91dd68c53a1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -664,25 +664,6 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data) return 0; } -static void i915_ring_seqno_info(struct seq_file *m, - struct intel_engine_cs *engine) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct rb_node *rb; - - seq_printf(m, "Current sequence (%s): %x\n", - engine->name, intel_engine_get_seqno(engine)); - - spin_lock_irq(&b->rb_lock); - for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = rb_entry(rb, typeof(*w), node); - - seq_printf(m, "Waiting (%s): %s [%d] on %x\n", - engine->name, w->tsk->comm, w->tsk->pid, w->seqno); - } - spin_unlock_irq(&b->rb_lock); -} - static int i915_interrupt_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -883,13 +864,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "Graphics Interrupt mask: %08x\n", I915_READ(GTIMR)); } - for_each_engine(engine, dev_priv, id) { - if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(dev_priv) >= 6) { + for_each_engine(engine, dev_priv, id) { seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", engine->name, I915_READ_IMR(engine)); } - i915_ring_seqno_info(m, engine); } intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 04c31475a7ae..aad353195f17 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1840,6 +1840,10 @@ void intel_engine_dump(struct intel_engine_cs *engine, } spin_unlock_irq(&b->rb_lock); + if (INTEL_GEN(dev_priv) >= 6) { + drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine)); + } + drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", engine->irq_posted, yesno(test_bit(ENGINE_IRQ_BREADCRUMB, From eb3f05171c2e84f0114403df0fea942479fdaa3e Mon Sep 17 00:00:00 2001 From: Pei Zhang Date: Mon, 11 Dec 2017 17:15:02 +0800 Subject: [PATCH 51/87] drm/i915/gvt: refine function emulate_mmio_read/write These 2 functions are coded by multiple person in multiple patches. The 'return' and 'goto err' are mix-used in same place, which cause the function looks disorder. Unify to use only 'goto' so that the gvt lock is acquired in one place and released in one place. Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio.c | 36 ++++++++++++++------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 4ea0feb5f04d..f7227a3ad469 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -157,7 +157,6 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, unsigned int offset = 0; int ret = -EINVAL; - if (vgpu->failsafe) { failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true); return 0; @@ -166,8 +165,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, if (vgpu_gpa_is_aperture(vgpu, pa)) { ret = vgpu_aperture_rw(vgpu, pa, p_data, bytes, true); - mutex_unlock(&gvt->lock); - return ret; + goto out; } if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { @@ -183,8 +181,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, ret, t->gfn, pa, *(u32 *)p_data, bytes); } - mutex_unlock(&gvt->lock); - return ret; + goto out; } } @@ -205,14 +202,12 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, p_data, bytes); if (ret) goto err; - mutex_unlock(&gvt->lock); - return ret; + goto out; } if (WARN_ON_ONCE(!reg_is_mmio(gvt, offset))) { ret = intel_gvt_hypervisor_read_gpa(vgpu, pa, p_data, bytes); - mutex_unlock(&gvt->lock); - return ret; + goto out; } if (WARN_ON(!reg_is_mmio(gvt, offset + bytes - 1))) @@ -228,11 +223,13 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, goto err; intel_gvt_mmio_set_accessed(gvt, offset); - mutex_unlock(&gvt->lock); - return 0; + ret = 0; + goto out; + err: gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n", offset, bytes); +out: mutex_unlock(&gvt->lock); return ret; } @@ -263,8 +260,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, if (vgpu_gpa_is_aperture(vgpu, pa)) { ret = vgpu_aperture_rw(vgpu, pa, p_data, bytes, false); - mutex_unlock(&gvt->lock); - return ret; + goto out; } if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) { @@ -280,8 +276,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, ret, t->gfn, pa, *(u32 *)p_data, bytes); } - mutex_unlock(&gvt->lock); - return ret; + goto out; } } @@ -302,14 +297,12 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, p_data, bytes); if (ret) goto err; - mutex_unlock(&gvt->lock); - return ret; + goto out; } if (WARN_ON_ONCE(!reg_is_mmio(gvt, offset))) { ret = intel_gvt_hypervisor_write_gpa(vgpu, pa, p_data, bytes); - mutex_unlock(&gvt->lock); - return ret; + goto out; } ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, false); @@ -317,11 +310,12 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, goto err; intel_gvt_mmio_set_accessed(gvt, offset); - mutex_unlock(&gvt->lock); - return 0; + ret = 0; + goto out; err: gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset, bytes); +out: mutex_unlock(&gvt->lock); return ret; } From 461bd6227ede277138bf33c2156b6ebd1fba04c2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 9 Dec 2017 00:37:59 -0600 Subject: [PATCH 52/87] drm/i915/gvt/fb_decoder: Fix out-of-bounds read In case function skl_format_to_drm returns -EINVAL, fmt turns into a huge number as fmt is of type u32, hence there is an out-of-bounds read when using fmt as an index for array skl_pixel_formats at line 225: plane->bpp = skl_pixel_formats[fmt].bpp; Fix this by comparing the value returned by function skl_format_to_drm against the size of array skl_pixel_formats, so in case it is greater than or equal to the number of items contained in skl_pixel_formats, print an error message and return -EINVAL. Addresses-Coverity-ID: 1462495 Addresses-Coverity-ID: 1462502 ("Out-of-bounds read") Fixes: 9f31d1063b43 ("drm/i915/gvt: Add framebuffer decoder support") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/fb_decoder.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 72f42176f35c..6cc99543693f 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -222,6 +222,12 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, val & PLANE_CTL_ORDER_RGBX, val & PLANE_CTL_ALPHA_MASK, val & PLANE_CTL_YUV422_ORDER_MASK); + + if (fmt >= ARRAY_SIZE(skl_pixel_formats)) { + gvt_vgpu_err("Out-of-bounds pixel format index\n"); + return -EINVAL; + } + plane->bpp = skl_pixel_formats[fmt].bpp; plane->drm_format = skl_pixel_formats[fmt].drm_format; } else { From 8516673a996870ea0ceb337ee4f83c33c5ec3111 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Dec 2017 21:46:16 +0000 Subject: [PATCH 53/87] agp/intel: Flush all chipset writes after updating the GGTT Before accessing the GGTT we must flush the PTE writes and make them visible to the chipset, or else the indirect access may end up in the wrong page. In commit 3497971a71d8 ("agp/intel: Flush chipset writes after updating a single PTE"), we noticed corruption of the uploads for pwrite and for capturing GPU error states, but it was presumed that the explicit calls to intel_gtt_chipset_flush() were sufficient for the execbuffer path. However, we have not been flushing the chipset between the PTE writes and access via the GTT itself. For simplicity, do the flush after any PTE update rather than try and batch the flushes on a just-in-time basis. References: 3497971a71d8 ("agp/intel: Flush chipset writes after updating a single PTE") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: drm-intel-fixes@lists.freedesktop.org Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171208214616.30147-1-chris@chris-wilson.co.uk --- drivers/char/agp/intel-gtt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 9b6b6023193b..dde7caac7f9f 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -872,6 +872,8 @@ void intel_gtt_insert_sg_entries(struct sg_table *st, } } wmb(); + if (intel_private.driver->chipset_flush) + intel_private.driver->chipset_flush(); } EXPORT_SYMBOL(intel_gtt_insert_sg_entries); From 19553d57dcddf659133739cd073bf2049f2b0ea9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Dec 2017 21:08:35 +0000 Subject: [PATCH 54/87] drm/i915/selftests: Free mock_i915->drm.mode_config Even for the mock i915 device, we need to initialise the drm.mode_config, as we may ultimately query whether there are any KMS users deep in the bowels of some paths (e.g. eviction). As we initialise drm.mode_config we must cleanup after ourselves! Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171209210835.32609-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 80f152aaedf9..1bc61f3f76fc 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -85,6 +85,8 @@ static void mock_device_release(struct drm_device *dev) i915_gemfs_fini(i915); + drm_mode_config_cleanup(&i915->drm); + drm_dev_fini(&i915->drm); put_device(&i915->drm.pdev->dev); } @@ -187,7 +189,7 @@ struct drm_i915_private *mock_gem_device(void) i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) - goto put_device; + goto err_drv; mock_init_contexts(i915); @@ -266,6 +268,9 @@ struct drm_i915_private *mock_gem_device(void) kmem_cache_destroy(i915->objects); err_wq: destroy_workqueue(i915->wq); +err_drv: + drm_mode_config_cleanup(&i915->drm); + drm_dev_fini(&i915->drm); put_device: put_device(&pdev->dev); err: From 776bc27fd8ab67a675cb0041d3af361af5d0e290 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Dec 2017 12:10:33 +0000 Subject: [PATCH 55/87] drm/i915: Stop listening to request resubmission from the signaler kthread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intent here was that we would be listening to i915_gem_request_unsubmit in order to cancel the signaler quickly and release the reference on the request. Cancelling the signaler is done directly via intel_engine_cancel_signaling (called from unsubmit), but that does not directly wake up the signaling thread, and neither does setting the request->global_seqno back to zero wake up listeners to the request->execute waitqueue. So the only time that listening to the request->execute waitqueue would wake up the signaling kthread would be on the request resubmission, during which time we would already receive wake ups from rejoining the global breadcrumbs wait rbtree. Trying to wake up to release the request remains an issue. If the signaling was cancelled and no other request required signaling, then it is possible for us to shutdown with the reference on the request still held. To ensure that we do not try to shutdown, leaking that request, we kick the signaling threads whenever we disarm the breadcrumbs, i.e. on parking the engine when idle. v2: We do need to be sure to release the last reference on stopping the kthread; asserting that it has been dropped already is insufficient. Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the execution queue") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20171208121033.5236-1-chris@chris-wilson.co.uk Acked-by: Daniel Vetter Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 24c6fefdd0b1..a7740696114d 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -217,7 +217,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) struct intel_wait *wait, *n; if (!b->irq_armed) - return; + goto wakeup_signaler; /* * We only disarm the irq when we are idle (all requests completed), @@ -242,6 +242,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) b->waiters = RB_ROOT; spin_unlock_irq(&b->rb_lock); + + /* + * The signaling thread may be asleep holding a reference to a request, + * that had its signaling cancelled prior to being preempted. We need + * to kick the signaler, just in case, to release any such reference. + */ +wakeup_signaler: + wake_up_process(b->signaler); } static bool use_fake_irq(const struct intel_breadcrumbs *b) @@ -686,23 +694,15 @@ static int intel_breadcrumbs_signaler(void *arg) } if (unlikely(do_schedule)) { - DEFINE_WAIT(exec); - if (kthread_should_park()) kthread_parkme(); - if (kthread_should_stop()) { - GEM_BUG_ON(request); + if (unlikely(kthread_should_stop())) { + i915_gem_request_put(request); break; } - if (request) - add_wait_queue(&request->execute, &exec); - schedule(); - - if (request) - remove_wait_queue(&request->execute, &exec); } i915_gem_request_put(request); } while (1); From b92326a04071ed5a02bc31c2359da2cdadde743c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Dec 2017 12:47:10 +0000 Subject: [PATCH 56/87] drm/i915: Only report a wakeup if the waiter was truly asleep If we attempt to wake up a waiter, who is currently checking the seqno it will be in the TASK_INTERRUPTIBLE state and ttwu will report success. However, it is actually awake and functioning -- so delay reporting the actual wake up until it sleeps. This fixes some spurious claims of missed_breadcrumbs when running under heavy load; i.e. sufficient load to preempt away the newly woken waiter before they complete their checks. However, it does so at the cost of a rare false negative; where the waiter changes between the check and ttwu -- the only way to fix that would be to extend the reporting from ttwu where the check could be done atomically. v2: Defend against !CONFIG_SMP v3: Don't filter out calls to wake_up_process v4: Drop risky microoptimisation to skip wakeups Testcase: igt/drv_missed_irq # sanity check we do detect missed_breadcrumb() Testcase: igt/gem_concurrent_blit # for generating false positives References: https://bugs.freedesktop.org/show_bug.cgi?id=100007 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171209124710.1606-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 26 ++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index a7740696114d..58c624f982d9 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -27,6 +27,12 @@ #include "i915_drv.h" +#ifdef CONFIG_SMP +#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_cpu) +#else +#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL) +#endif + static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) { struct intel_wait *wait; @@ -36,8 +42,20 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) wait = b->irq_wait; if (wait) { + /* + * N.B. Since task_asleep() and ttwu are not atomic, the + * waiter may actually go to sleep after the check, causing + * us to suppress a valid wakeup. We prefer to reduce the + * number of false positive missed_breadcrumb() warnings + * at the expense of a few false negatives, as it it easy + * to trigger a false positive under heavy load. Enough + * signal should remain from genuine missed_breadcrumb() + * for us to detect in CI. + */ + bool was_asleep = task_asleep(wait->tsk); + result = ENGINE_WAKEUP_WAITER; - if (wake_up_process(wait->tsk)) + if (wake_up_process(wait->tsk) && was_asleep) result |= ENGINE_WAKEUP_ASLEEP; } @@ -77,8 +95,8 @@ static noinline void missed_breadcrumb(struct intel_engine_cs *engine) static void intel_breadcrumbs_hangcheck(struct timer_list *t) { - struct intel_engine_cs *engine = from_timer(engine, t, - breadcrumbs.hangcheck); + struct intel_engine_cs *engine = + from_timer(engine, t, breadcrumbs.hangcheck); struct intel_breadcrumbs *b = &engine->breadcrumbs; if (!b->irq_armed) @@ -104,7 +122,7 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) */ if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { missed_breadcrumb(engine); - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + mod_timer(&b->fake_irq, jiffies + 1); } else { mod_timer(&b->hangcheck, wait_timeout()); } From 6f9fa996c9bf3d86406df95982c7b3def38eb76f Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Mon, 11 Dec 2017 15:18:14 +0000 Subject: [PATCH 57/87] x86/early-quirks: Extend Intel graphics stolen memory placement to 64bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To give upcoming SKU BIOSes more flexibility in placing the Intel graphics stolen memory, make all variables storing the placement or size compatible with full 64 bit range. Signed-off-by: Joonas Lahtinen Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Ville Syrjälä Cc: Chris Wilson Cc: Paulo Zanoni Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Chris Wilson Acked-by: Ingo Molnar Link: https://patchwork.freedesktop.org/patch/msgid/20171211151822.20953-2-matthew.auld@intel.com --- arch/x86/kernel/early-quirks.c | 75 +++++++++++++++++----------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 1e82f787c160..a18de5fb9e68 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -243,7 +243,7 @@ static void __init intel_remapping_check(int num, int slot, int func) #define KB(x) ((x) * 1024UL) #define MB(x) (KB (KB (x))) -static size_t __init i830_tseg_size(void) +static resource_size_t __init i830_tseg_size(void) { u8 esmramc = read_pci_config_byte(0, 0, 0, I830_ESMRAMC); @@ -256,7 +256,7 @@ static size_t __init i830_tseg_size(void) return KB(512); } -static size_t __init i845_tseg_size(void) +static resource_size_t __init i845_tseg_size(void) { u8 esmramc = read_pci_config_byte(0, 0, 0, I845_ESMRAMC); u8 tseg_size = esmramc & I845_TSEG_SIZE_MASK; @@ -273,7 +273,7 @@ static size_t __init i845_tseg_size(void) return 0; } -static size_t __init i85x_tseg_size(void) +static resource_size_t __init i85x_tseg_size(void) { u8 esmramc = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC); @@ -283,12 +283,12 @@ static size_t __init i85x_tseg_size(void) return MB(1); } -static size_t __init i830_mem_size(void) +static resource_size_t __init i830_mem_size(void) { return read_pci_config_byte(0, 0, 0, I830_DRB3) * MB(32); } -static size_t __init i85x_mem_size(void) +static resource_size_t __init i85x_mem_size(void) { return read_pci_config_byte(0, 0, 1, I85X_DRB3) * MB(32); } @@ -297,36 +297,36 @@ static size_t __init i85x_mem_size(void) * On 830/845/85x the stolen memory base isn't available in any * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size. */ -static phys_addr_t __init i830_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i830_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { - return (phys_addr_t)i830_mem_size() - i830_tseg_size() - stolen_size; + return i830_mem_size() - i830_tseg_size() - stolen_size; } -static phys_addr_t __init i845_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i845_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { - return (phys_addr_t)i830_mem_size() - i845_tseg_size() - stolen_size; + return i830_mem_size() - i845_tseg_size() - stolen_size; } -static phys_addr_t __init i85x_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i85x_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { - return (phys_addr_t)i85x_mem_size() - i85x_tseg_size() - stolen_size; + return i85x_mem_size() - i85x_tseg_size() - stolen_size; } -static phys_addr_t __init i865_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init i865_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { u16 toud = 0; toud = read_pci_config_16(0, 0, 0, I865_TOUD); - return (phys_addr_t)(toud << 16) + i845_tseg_size(); + return toud * KB(64) + i845_tseg_size(); } -static phys_addr_t __init gen3_stolen_base(int num, int slot, int func, - size_t stolen_size) +static resource_size_t __init gen3_stolen_base(int num, int slot, int func, + resource_size_t stolen_size) { u32 bsm; @@ -337,10 +337,10 @@ static phys_addr_t __init gen3_stolen_base(int num, int slot, int func, */ bsm = read_pci_config(num, slot, func, INTEL_BSM); - return (phys_addr_t)bsm & INTEL_BSM_MASK; + return bsm & INTEL_BSM_MASK; } -static size_t __init i830_stolen_size(int num, int slot, int func) +static resource_size_t __init i830_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -361,7 +361,7 @@ static size_t __init i830_stolen_size(int num, int slot, int func) return 0; } -static size_t __init gen3_stolen_size(int num, int slot, int func) +static resource_size_t __init gen3_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -390,7 +390,7 @@ static size_t __init gen3_stolen_size(int num, int slot, int func) return 0; } -static size_t __init gen6_stolen_size(int num, int slot, int func) +static resource_size_t __init gen6_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -398,10 +398,10 @@ static size_t __init gen6_stolen_size(int num, int slot, int func) gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); gms = (gmch_ctrl >> SNB_GMCH_GMS_SHIFT) & SNB_GMCH_GMS_MASK; - return (size_t)gms * MB(32); + return gms * MB(32); } -static size_t __init gen8_stolen_size(int num, int slot, int func) +static resource_size_t __init gen8_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -409,10 +409,10 @@ static size_t __init gen8_stolen_size(int num, int slot, int func) gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); gms = (gmch_ctrl >> BDW_GMCH_GMS_SHIFT) & BDW_GMCH_GMS_MASK; - return (size_t)gms * MB(32); + return gms * MB(32); } -static size_t __init chv_stolen_size(int num, int slot, int func) +static resource_size_t __init chv_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -426,14 +426,14 @@ static size_t __init chv_stolen_size(int num, int slot, int func) * 0x17 to 0x1d: 4MB increments start at 36MB */ if (gms < 0x11) - return (size_t)gms * MB(32); + return gms * MB(32); else if (gms < 0x17) - return (size_t)(gms - 0x11 + 2) * MB(4); + return (gms - 0x11 + 2) * MB(4); else - return (size_t)(gms - 0x17 + 9) * MB(4); + return (gms - 0x17 + 9) * MB(4); } -static size_t __init gen9_stolen_size(int num, int slot, int func) +static resource_size_t __init gen9_stolen_size(int num, int slot, int func) { u16 gmch_ctrl; u16 gms; @@ -444,14 +444,15 @@ static size_t __init gen9_stolen_size(int num, int slot, int func) /* 0x0 to 0xef: 32MB increments starting at 0MB */ /* 0xf0 to 0xfe: 4MB increments starting at 4MB */ if (gms < 0xf0) - return (size_t)gms * MB(32); + return gms * MB(32); else - return (size_t)(gms - 0xf0 + 1) * MB(4); + return (gms - 0xf0 + 1) * MB(4); } struct intel_early_ops { - size_t (*stolen_size)(int num, int slot, int func); - phys_addr_t (*stolen_base)(int num, int slot, int func, size_t size); + resource_size_t (*stolen_size)(int num, int slot, int func); + resource_size_t (*stolen_base)(int num, int slot, int func, + resource_size_t size); }; static const struct intel_early_ops i830_early_ops __initconst = { @@ -535,8 +536,8 @@ static void __init intel_graphics_stolen(int num, int slot, int func, const struct intel_early_ops *early_ops) { - phys_addr_t base, end; - size_t size; + resource_size_t base, size; + resource_size_t end; size = early_ops->stolen_size(num, slot, func); base = early_ops->stolen_base(num, slot, func, size); From 55f56fc46020ea59273ddb5de0bdc41b7da45c73 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Dec 2017 15:18:15 +0000 Subject: [PATCH 58/87] x86/early-quirks: export the stolen region as a resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We duplicate the stolen discovery code in early-quirks and in i915, however if we just export the region as a resource from early-quirks we can nuke the duplication. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Ville Syrjälä Cc: Chris Wilson Cc: Paulo Zanoni Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Chris Wilson Acked-by: Ingo Molnar Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211151822.20953-3-matthew.auld@intel.com --- arch/x86/kernel/early-quirks.c | 11 +++++++++-- include/drm/i915_drm.h | 3 +++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a18de5fb9e68..6c1624889011 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -532,6 +532,9 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_CNL_IDS(&gen9_early_ops), }; +struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); +EXPORT_SYMBOL(intel_graphics_stolen_res); + static void __init intel_graphics_stolen(int num, int slot, int func, const struct intel_early_ops *early_ops) @@ -546,8 +549,12 @@ intel_graphics_stolen(int num, int slot, int func, return; end = base + size - 1; - printk(KERN_INFO "Reserving Intel graphics memory at %pa-%pa\n", - &base, &end); + + intel_graphics_stolen_res.start = base; + intel_graphics_stolen_res.end = end; + + printk(KERN_INFO "Reserving Intel graphics memory at %pR\n", + &intel_graphics_stolen_res); /* Mark this space as reserved */ e820__range_add(base, size, E820_TYPE_RESERVED); diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 4e1b274e1164..c9e5a6621b95 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -36,6 +36,9 @@ extern bool i915_gpu_lower(void); extern bool i915_gpu_busy(void); extern bool i915_gpu_turbo_disable(void); +/* Exported from arch/x86/kernel/early-quirks.c */ +extern struct resource intel_graphics_stolen_res; + /* * The Bridge device's PCI config space has information about the * fb aperture size and the amount of pre-reserved memory. From 3b51b6f31aa81c435601ab7eed244a120731c781 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Dec 2017 15:18:16 +0000 Subject: [PATCH 59/87] x86/early-quirks: replace the magical increment start values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the magical +2, +9 etc. with +MB, which is far easier to read. Suggested-by: Ville Syrjälä Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Ville Syrjälä Cc: Chris Wilson Cc: Paulo Zanoni Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Ville Syrjälä Acked-by: Ingo Molnar Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211151822.20953-4-matthew.auld@intel.com --- arch/x86/kernel/early-quirks.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 6c1624889011..3cbb2c78a9df 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -428,9 +428,9 @@ static resource_size_t __init chv_stolen_size(int num, int slot, int func) if (gms < 0x11) return gms * MB(32); else if (gms < 0x17) - return (gms - 0x11 + 2) * MB(4); + return (gms - 0x11) * MB(4) + MB(8); else - return (gms - 0x17 + 9) * MB(4); + return (gms - 0x17) * MB(4) + MB(36); } static resource_size_t __init gen9_stolen_size(int num, int slot, int func) @@ -446,7 +446,7 @@ static resource_size_t __init gen9_stolen_size(int num, int slot, int func) if (gms < 0xf0) return gms * MB(32); else - return (gms - 0xf0 + 1) * MB(4); + return (gms - 0xf0) * MB(4) + MB(4); } struct intel_early_ops { From f773568b6ff8c31fce165d177a968f2ae31d10bb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Dec 2017 15:18:17 +0000 Subject: [PATCH 60/87] drm/i915: nuke the duplicated stolen discovery We duplicate the stolen discovery code in early-quirks and in i915, however now that the stolen region is exported as a resource from early-quirks we can nuke the duplication. v2: check overflows_type Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Paulo Zanoni Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211151822.20953-5-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 51 +----------- drivers/gpu/drm/i915/i915_gem_stolen.c | 109 +------------------------ 2 files changed, 5 insertions(+), 155 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 5e7efbbac9f7..c1b8b1c8401e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2949,50 +2949,6 @@ static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) return 0; } -static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) -{ - snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; - snb_gmch_ctl &= SNB_GMCH_GMS_MASK; - return (size_t)snb_gmch_ctl << 25; /* 32 MB units */ -} - -static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) -{ - bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; - bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; - return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */ -} - -static size_t chv_get_stolen_size(u16 gmch_ctrl) -{ - gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; - gmch_ctrl &= SNB_GMCH_GMS_MASK; - - /* - * 0x0 to 0x10: 32MB increments starting at 0MB - * 0x11 to 0x16: 4MB increments starting at 8MB - * 0x17 to 0x1d: 4MB increments start at 36MB - */ - if (gmch_ctrl < 0x11) - return (size_t)gmch_ctrl << 25; - else if (gmch_ctrl < 0x17) - return (size_t)(gmch_ctrl - 0x11 + 2) << 22; - else - return (size_t)(gmch_ctrl - 0x17 + 9) << 22; -} - -static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) -{ - gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; - gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; - - if (gen9_gmch_ctl < 0xf0) - return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */ - else - /* 4MB increments starting at 0xf0 for 4MB */ - return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22; -} - static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) { struct drm_i915_private *dev_priv = ggtt->base.i915; @@ -3343,14 +3299,13 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + ggtt->stolen_size = resource_size(&intel_graphics_stolen_res); + if (INTEL_GEN(dev_priv) >= 9) { - ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); size = gen8_get_total_gtt_size(snb_gmch_ctl); } else if (IS_CHERRYVIEW(dev_priv)) { - ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); size = chv_get_total_gtt_size(snb_gmch_ctl); } else { - ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); size = gen8_get_total_gtt_size(snb_gmch_ctl); } @@ -3408,7 +3363,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); + ggtt->stolen_size = resource_size(&intel_graphics_stolen_res); size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 1877ae9a1d9b..f8ac1438c35d 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -30,9 +30,6 @@ #include #include "i915_drv.h" -#define KB(x) ((x) * 1024) -#define MB(x) (KB(x) * 1024) - /* * The BIOS typically reserves some of the system's memory for the exclusive * use of the integrated graphics. This memory is no longer available for @@ -81,113 +78,11 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; + dma_addr_t base = intel_graphics_stolen_res.start; struct resource *r; - dma_addr_t base; - /* Almost universally we can find the Graphics Base of Stolen Memory - * at register BSM (0x5c) in the igfx configuration space. On a few - * (desktop) machines this is also mirrored in the bridge device at - * different locations, or in the MCHBAR. - * - * On 865 we just check the TOUD register. - * - * On 830/845/85x the stolen memory base isn't available in any - * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size. - * - */ - base = 0; - if (INTEL_GEN(dev_priv) >= 3) { - u32 bsm; - - pci_read_config_dword(pdev, INTEL_BSM, &bsm); - - base = bsm & INTEL_BSM_MASK; - } else if (IS_I865G(dev_priv)) { - u32 tseg_size = 0; - u16 toud = 0; - u8 tmp; - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I845_ESMRAMC, &tmp); - - if (tmp & TSEG_ENABLE) { - switch (tmp & I845_TSEG_SIZE_MASK) { - case I845_TSEG_SIZE_512K: - tseg_size = KB(512); - break; - case I845_TSEG_SIZE_1M: - tseg_size = MB(1); - break; - } - } - - pci_bus_read_config_word(pdev->bus, PCI_DEVFN(0, 0), - I865_TOUD, &toud); - - base = (toud << 16) + tseg_size; - } else if (IS_I85X(dev_priv)) { - u32 tseg_size = 0; - u32 tom; - u8 tmp; - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I85X_ESMRAMC, &tmp); - - if (tmp & TSEG_ENABLE) - tseg_size = MB(1); - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 1), - I85X_DRB3, &tmp); - tom = tmp * MB(32); - - base = tom - tseg_size - ggtt->stolen_size; - } else if (IS_I845G(dev_priv)) { - u32 tseg_size = 0; - u32 tom; - u8 tmp; - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I845_ESMRAMC, &tmp); - - if (tmp & TSEG_ENABLE) { - switch (tmp & I845_TSEG_SIZE_MASK) { - case I845_TSEG_SIZE_512K: - tseg_size = KB(512); - break; - case I845_TSEG_SIZE_1M: - tseg_size = MB(1); - break; - } - } - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I830_DRB3, &tmp); - tom = tmp * MB(32); - - base = tom - tseg_size - ggtt->stolen_size; - } else if (IS_I830(dev_priv)) { - u32 tseg_size = 0; - u32 tom; - u8 tmp; - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I830_ESMRAMC, &tmp); - - if (tmp & TSEG_ENABLE) { - if (tmp & I830_TSEG_SIZE_1M) - tseg_size = MB(1); - else - tseg_size = KB(512); - } - - pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0), - I830_DRB3, &tmp); - tom = tmp * MB(32); - - base = tom - tseg_size - ggtt->stolen_size; - } + GEM_BUG_ON(overflows_type(intel_graphics_stolen_res.start, base)); if (base == 0 || add_overflows(base, ggtt->stolen_size)) return 0; From 7789422665f59982743a32a7728a448c9ddd4003 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Dec 2017 15:18:18 +0000 Subject: [PATCH 61/87] drm/i915: make dsm struct resource centric Now that we are using struct resource to track the stolen region, it is more convenient if we track dsm in a resource as well. v2: check range_overflow when writing to 32b registers (Chris) pepper in some comments (Chris) v3: refit i915_stolen_to_dma() v4: kill ggtt->stolen_size v5: some more polish Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Paulo Zanoni Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211151822.20953-6-matthew.auld@intel.com --- drivers/char/agp/intel-gtt.c | 2 - drivers/gpu/drm/i915/i915_drv.h | 12 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - drivers/gpu/drm/i915/i915_gem_stolen.c | 121 ++++++++++++------------- drivers/gpu/drm/i915/intel_fbc.c | 13 ++- drivers/gpu/drm/i915/intel_pm.c | 15 +-- include/drm/intel-gtt.h | 1 - 8 files changed, 87 insertions(+), 86 deletions(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index dde7caac7f9f..0c86b4dfd59c 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -1424,12 +1424,10 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, EXPORT_SYMBOL(intel_gmch_probe); void intel_gtt_get(u64 *gtt_total, - u32 *stolen_size, phys_addr_t *mappable_base, u64 *mappable_end) { *gtt_total = intel_private.gtt_total_entries << PAGE_SHIFT; - *stolen_size = intel_private.stolen_size; *mappable_base = intel_private.gma_bus_addr; *mappable_end = intel_private.gtt_mappable_entries << PAGE_SHIFT; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d57859cfad8e..1677b7e0d671 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1538,9 +1538,6 @@ struct i915_gem_mm { */ struct pagevec wc_stash; - /** Usable portion of the GTT for GEM */ - dma_addr_t stolen_base; /* limited to low memory (32-bit) */ - /** * tmpfs instance used for shmem backed objects */ @@ -2254,6 +2251,15 @@ struct drm_i915_private { const struct intel_device_info info; + /** + * Data Stolen Memory - aka "i915 stolen memory" gives us the start and + * end of stolen which we can optionally use to create GEM objects + * backed by stolen memory. Note that ggtt->stolen_usable_size tells us + * exactly how much of this we are actually allowed to use, given that + * some portion of it is in fact reserved for use by hardware functions. + */ + struct resource dsm; + void __iomem *regs; struct intel_uncore uncore; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c1b8b1c8401e..7a5302318d31 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3299,8 +3299,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - ggtt->stolen_size = resource_size(&intel_graphics_stolen_res); - if (INTEL_GEN(dev_priv) >= 9) { size = gen8_get_total_gtt_size(snb_gmch_ctl); } else if (IS_CHERRYVIEW(dev_priv)) { @@ -3363,8 +3361,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - ggtt->stolen_size = resource_size(&intel_graphics_stolen_res); - size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; @@ -3410,7 +3406,6 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) } intel_gtt_get(&ggtt->base.total, - &ggtt->stolen_size, &ggtt->mappable_base, &ggtt->mappable_end); @@ -3482,7 +3477,8 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) DRM_INFO("Memory usable by graphics device = %lluM\n", ggtt->base.total >> 20); DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); - DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20); + DRM_DEBUG_DRIVER("GTT stolen size = %lluM\n", + (u64)resource_size(&intel_graphics_stolen_res) >> 20); if (intel_vtd_active()) DRM_INFO("VT-d active for gfx access\n"); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 93211a96fdad..30a2920b1291 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -381,7 +381,6 @@ struct i915_ggtt { * avoid the first page! The upper end of stolen memory is reserved for * hardware functions and similarly removed from the accessible range. */ - u32 stolen_size; /* Total size of stolen memory */ u32 stolen_usable_size; /* Total size minus reserved ranges */ u32 stolen_reserved_base; u32 stolen_reserved_size; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index f8ac1438c35d..5b5558fe70cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -76,27 +76,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->mm.stolen_lock); } -static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) +static int i915_adjust_stolen(struct drm_i915_private *dev_priv, + struct resource *dsm) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - dma_addr_t base = intel_graphics_stolen_res.start; struct resource *r; - GEM_BUG_ON(overflows_type(intel_graphics_stolen_res.start, base)); + if (dsm->start == 0 || dsm->end <= dsm->start) + return -EINVAL; - if (base == 0 || add_overflows(base, ggtt->stolen_size)) - return 0; + /* + * TODO: We have yet too encounter the case where the GTT wasn't at the + * end of stolen. With that assumption we could simplify this. + */ - /* make sure we don't clobber the GTT if it's within stolen memory */ + /* Make sure we don't clobber the GTT if it's within stolen memory */ if (INTEL_GEN(dev_priv) <= 4 && !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { - struct { - dma_addr_t start, end; - } stolen[2] = { - { .start = base, .end = base + ggtt->stolen_size, }, - { .start = base, .end = base + ggtt->stolen_size, }, - }; - u64 ggtt_start, ggtt_end; + struct resource stolen[2] = {*dsm, *dsm}; + struct resource ggtt_res; + u64 ggtt_start; ggtt_start = I915_READ(PGTBL_CTL); if (IS_GEN4(dev_priv)) @@ -104,70 +103,64 @@ static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28; else ggtt_start &= PGTBL_ADDRESS_LO_MASK; - ggtt_end = ggtt_start + ggtt_total_entries(ggtt) * 4; - if (ggtt_start >= stolen[0].start && ggtt_start < stolen[0].end) - stolen[0].end = ggtt_start; - if (ggtt_end > stolen[1].start && ggtt_end <= stolen[1].end) - stolen[1].start = ggtt_end; + ggtt_res = + (struct resource) DEFINE_RES_MEM(ggtt_start, + ggtt_total_entries(ggtt) * 4); - /* pick the larger of the two chunks */ - if (stolen[0].end - stolen[0].start > - stolen[1].end - stolen[1].start) { - base = stolen[0].start; - ggtt->stolen_size = stolen[0].end - stolen[0].start; - } else { - base = stolen[1].start; - ggtt->stolen_size = stolen[1].end - stolen[1].start; - } + if (ggtt_res.start >= stolen[0].start && ggtt_res.start < stolen[0].end) + stolen[0].end = ggtt_res.start; + if (ggtt_res.end > stolen[1].start && ggtt_res.end <= stolen[1].end) + stolen[1].start = ggtt_res.end; + + /* Pick the larger of the two chunks */ + if (resource_size(&stolen[0]) > resource_size(&stolen[1])) + *dsm = stolen[0]; + else + *dsm = stolen[1]; if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { - dma_addr_t end = base + ggtt->stolen_size - 1; - - DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", - (unsigned long long)ggtt_start, - (unsigned long long)ggtt_end - 1); - DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n", - &base, &end); + DRM_DEBUG_KMS("GTT within stolen memory at %pR\n", &ggtt_res); + DRM_DEBUG_KMS("Stolen memory adjusted to %pR\n", dsm); } } - - /* Verify that nothing else uses this physical address. Stolen + /* + * Verify that nothing else uses this physical address. Stolen * memory should be reserved by the BIOS and hidden from the * kernel. So if the region is already marked as busy, something * is seriously wrong. */ - r = devm_request_mem_region(dev_priv->drm.dev, base, ggtt->stolen_size, + r = devm_request_mem_region(dev_priv->drm.dev, dsm->start, + resource_size(dsm), "Graphics Stolen Memory"); if (r == NULL) { /* * One more attempt but this time requesting region from - * base + 1, as we have seen that this resolves the region + * start + 1, as we have seen that this resolves the region * conflict with the PCI Bus. * This is a BIOS w/a: Some BIOS wrap stolen in the root * PCI bus, but have an off-by-one error. Hence retry the * reservation starting from 1 instead of 0. * There's also BIOS with off-by-one on the other end. */ - r = devm_request_mem_region(dev_priv->drm.dev, base + 1, - ggtt->stolen_size - 2, + r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1, + resource_size(dsm) - 2, "Graphics Stolen Memory"); /* * GEN3 firmware likes to smash pci bridges into the stolen * range. Apparently this works. */ if (r == NULL && !IS_GEN3(dev_priv)) { - dma_addr_t end = base + ggtt->stolen_size; + DRM_ERROR("conflict detected with stolen region: %pR\n", + dsm); - DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n", - &base, &end); - base = 0; + return -EBUSY; } } - return base; + return 0; } void i915_gem_cleanup_stolen(struct drm_device *dev) @@ -183,11 +176,10 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, dma_addr_t *base, u32 *size) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? CTG_STOLEN_RESERVED : ELK_STOLEN_RESERVED); - dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + dma_addr_t stolen_top = dev_priv->dsm.end + 1; if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) { *base = 0; @@ -308,7 +300,6 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, dma_addr_t *base, u32 *size) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); dma_addr_t stolen_top; @@ -318,7 +309,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, return; } - stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + stolen_top = dev_priv->dsm.end + 1; *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; @@ -351,14 +342,18 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) return 0; } - if (ggtt->stolen_size == 0) + if (resource_size(&intel_graphics_stolen_res) == 0) return 0; - dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv); - if (dev_priv->mm.stolen_base == 0) + dev_priv->dsm = intel_graphics_stolen_res; + + if (i915_adjust_stolen(dev_priv, &dev_priv->dsm)) return 0; - stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + GEM_BUG_ON(dev_priv->dsm.start == 0); + GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); + + stolen_top = dev_priv->dsm.end + 1; reserved_base = 0; reserved_size = 0; @@ -399,12 +394,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_base = stolen_top; } - if (reserved_base < dev_priv->mm.stolen_base || + if (reserved_base < dev_priv->dsm.start || reserved_base + reserved_size > stolen_top) { dma_addr_t reserved_top = reserved_base + reserved_size; - DRM_ERROR("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n", - &reserved_base, &reserved_top, - &dev_priv->mm.stolen_base, &stolen_top); + DRM_ERROR("Stolen reserved area [%pad - %pad] outside stolen memory %pR\n", + &reserved_base, &reserved_top, &dev_priv->dsm); return 0; } @@ -415,9 +409,9 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; - DRM_DEBUG_KMS("Memory reserved for graphics device: %uK, usable: %uK\n", - ggtt->stolen_size >> 10, - (ggtt->stolen_size - reserved_total) >> 10); + DRM_DEBUG_KMS("Memory reserved for graphics device: %lluK, usable: %lluK\n", + (u64)resource_size(&dev_priv->dsm) >> 10, + ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); stolen_usable_start = 0; /* WaSkipStolenMemoryFirstPage:bdw+ */ @@ -425,7 +419,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) stolen_usable_start = 4096; ggtt->stolen_usable_size = - ggtt->stolen_size - reserved_total - stolen_usable_start; + resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start; /* Basic memrange allocator for stolen space. */ drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start, @@ -442,7 +436,8 @@ i915_pages_create_for_stolen(struct drm_device *dev, struct sg_table *st; struct scatterlist *sg; - GEM_BUG_ON(range_overflows(offset, size, dev_priv->ggtt.stolen_size)); + GEM_BUG_ON(range_overflows_t(resource_size_t, offset, size, + resource_size(&dev_priv->dsm))); /* We hide that we have no struct page backing our stolen object * by wrapping the contiguous physical allocation with a fake @@ -462,7 +457,7 @@ i915_pages_create_for_stolen(struct drm_device *dev, sg->offset = 0; sg->length = size; - sg_dma_address(sg) = (dma_addr_t)dev_priv->mm.stolen_base + offset; + sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset; sg_dma_len(sg) = size; return st; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 4aefc658a5cf..f88c1b5dae4c 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -531,7 +531,6 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, int size, int fb_cpp) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; int compression_threshold = 1; int ret; u64 end; @@ -541,7 +540,7 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv)) - end = ggtt->stolen_size - 8 * 1024 * 1024; + end = resource_size(&dev_priv->dsm) - 8 * 1024 * 1024; else end = U64_MAX; @@ -615,10 +614,16 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc) fbc->compressed_llb = compressed_llb; + GEM_BUG_ON(range_overflows_t(u64, dev_priv->dsm.start, + fbc->compressed_fb.start, + U32_MAX)); + GEM_BUG_ON(range_overflows_t(u64, dev_priv->dsm.start, + fbc->compressed_llb->start, + U32_MAX)); I915_WRITE(FBC_CFB_BASE, - dev_priv->mm.stolen_base + fbc->compressed_fb.start); + dev_priv->dsm.start + fbc->compressed_fb.start); I915_WRITE(FBC_LL_BASE, - dev_priv->mm.stolen_base + compressed_llb->start); + dev_priv->dsm.start + compressed_llb->start); } DRM_DEBUG_KMS("reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n", diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5836181d6f8a..79b3fd617de0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7020,7 +7020,7 @@ static void valleyview_check_pctx(struct drm_i915_private *dev_priv) { unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - WARN_ON(pctx_addr != dev_priv->mm.stolen_base + + WARN_ON(pctx_addr != dev_priv->dsm.start + dev_priv->vlv_pctx->stolen->start); } @@ -7035,7 +7035,6 @@ static void cherryview_check_pctx(struct drm_i915_private *dev_priv) static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; unsigned long pctx_paddr, paddr; u32 pcbr; int pctx_size = 32*1024; @@ -7043,8 +7042,8 @@ static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) pcbr = I915_READ(VLV_PCBR); if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - paddr = (dev_priv->mm.stolen_base + - (ggtt->stolen_size - pctx_size)); + paddr = dev_priv->dsm.end + 1 - pctx_size; + GEM_BUG_ON(paddr > U32_MAX); pctx_paddr = (paddr & (~4095)); I915_WRITE(VLV_PCBR, pctx_paddr); @@ -7065,7 +7064,7 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) /* BIOS set it up already, grab the pre-alloc'd space */ int pcbr_offset; - pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; + pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start; pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv, pcbr_offset, I915_GTT_OFFSET_NONE, @@ -7089,7 +7088,11 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) goto out; } - pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; + GEM_BUG_ON(range_overflows_t(u64, + dev_priv->dsm.start, + pctx->stolen->start, + U32_MAX)); + pctx_paddr = dev_priv->dsm.start + pctx->stolen->start; I915_WRITE(VLV_PCBR, pctx_paddr); out: diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index c5db7975c640..8c577ca38e2e 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -5,7 +5,6 @@ #define _DRM_INTEL_GTT_H void intel_gtt_get(u64 *gtt_total, - u32 *stolen_size, phys_addr_t *mappable_base, u64 *mappable_end); From 17a053454b247e056214013a40cca7bdf4e340c2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Dec 2017 15:18:19 +0000 Subject: [PATCH 62/87] drm/i915: make reserved struct resource centric Now that we are using struct resource to track the stolen region, it is more convenient if we track the reserved portion of that region in a resource as well. v2: s/<= end + 1/< end/ (Chris) v3: prefer DEFINE_RES_MEM Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Paulo Zanoni Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211151822.20953-7-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 ++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 2 -- drivers/gpu/drm/i915/i915_gem_stolen.c | 14 ++++++-------- drivers/gpu/drm/i915/intel_pm.c | 6 ++---- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1677b7e0d671..7945af0037f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2259,6 +2259,10 @@ struct drm_i915_private { * some portion of it is in fact reserved for use by hardware functions. */ struct resource dsm; + /** + * Reseved portion of Data Stolen Memory + */ + struct resource dsm_reserved; void __iomem *regs; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 30a2920b1291..db20c72ecfc8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -382,8 +382,6 @@ struct i915_ggtt { * hardware functions and similarly removed from the accessible range. */ u32 stolen_usable_size; /* Total size minus reserved ranges */ - u32 stolen_reserved_base; - u32 stolen_reserved_size; /** "Graphics Stolen Memory" holds the global PTEs */ void __iomem *gsm; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 5b5558fe70cc..b6cca0d4ecd3 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -394,17 +394,15 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) reserved_base = stolen_top; } - if (reserved_base < dev_priv->dsm.start || - reserved_base + reserved_size > stolen_top) { - dma_addr_t reserved_top = reserved_base + reserved_size; - DRM_ERROR("Stolen reserved area [%pad - %pad] outside stolen memory %pR\n", - &reserved_base, &reserved_top, &dev_priv->dsm); + dev_priv->dsm_reserved = + (struct resource) DEFINE_RES_MEM(reserved_base, reserved_size); + + if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) { + DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n", + &dev_priv->dsm_reserved, &dev_priv->dsm); return 0; } - ggtt->stolen_reserved_base = reserved_base; - ggtt->stolen_reserved_size = reserved_size; - /* It is possible for the reserved area to end before the end of stolen * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 79b3fd617de0..57dcf8e1ff30 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6416,7 +6416,6 @@ static void valleyview_disable_rps(struct drm_i915_private *dev_priv) static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; bool enable_rc6 = true; unsigned long rc6_ctx_base; u32 rc_ctl; @@ -6441,9 +6440,8 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) * for this check. */ rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; - if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) && - (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base + - ggtt->stolen_reserved_size))) { + if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) && + (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) { DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); enable_rc6 = false; } From 73ebd503034c1abe31137df02dd4493eb7a522d4 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Dec 2017 15:18:20 +0000 Subject: [PATCH 63/87] drm/i915: make mappable struct resource centric Now that we are using struct resource to track the stolen region, it is more convenient if we track the mappable region in a resource as well. v2: prefer iomap and gmadr naming scheme prefer DEFINE_RES_MEM Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Paulo Zanoni Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211151822.20953-8-matthew.auld@intel.com --- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 8 ++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 29 ++++++++++++------- drivers/gpu/drm/i915/i915_gem_gtt.h | 4 +-- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 4 +-- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 +-- drivers/gpu/drm/i915/selftests/mock_gtt.c | 4 +-- 12 files changed, 37 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 77df9bad5dea..103910a24e4b 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -348,7 +348,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt); /* Aperture/GM space definitions for GVT device */ #define gvt_aperture_sz(gvt) (gvt->dev_priv->ggtt.mappable_end) -#define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.mappable_base) +#define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.gmadr.start) #define gvt_ggtt_gm_sz(gvt) (gvt->dev_priv->ggtt.base.total) #define gvt_ggtt_sz(gvt) \ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5b1fd5f1defb..54a8fca7e7b2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -726,7 +726,7 @@ static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv) if (!ap) return -ENOMEM; - ap->ranges[0].base = ggtt->mappable_base; + ap->ranges[0].base = ggtt->gmadr.start; ap->ranges[0].size = ggtt->mappable_end; primary = diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fcc9b53864f0..e89496aec857 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1116,7 +1116,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, page_base += offset & PAGE_MASK; } - if (gtt_user_read(&ggtt->mappable, page_base, page_offset, + if (gtt_user_read(&ggtt->iomap, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; break; @@ -1324,7 +1324,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * If the object is non-shmem backed, we retry again with the * path that handles page fault. */ - if (ggtt_write(&ggtt->mappable, page_base, page_offset, + if (ggtt_write(&ggtt->iomap, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; break; @@ -1967,9 +1967,9 @@ int i915_gem_fault(struct vm_fault *vmf) /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), - (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, + (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->mappable); + &ggtt->iomap); if (ret) goto err_fence; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 70ccd63cbf8e..4401068ff468 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1012,7 +1012,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, offset += page << PAGE_SHIFT; } - vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->mappable, + vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap, offset); cache->page = page; cache->vaddr = (unsigned long)vaddr; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7a5302318d31..fbc1f467273a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2912,7 +2912,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->drm.struct_mutex); arch_phys_wc_del(ggtt->mtrr); - io_mapping_fini(&ggtt->mappable); + io_mapping_fini(&ggtt->iomap); } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -3288,8 +3288,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) int err; /* TODO: We're not aware of mappable constraints on gen8 yet */ - ggtt->mappable_base = pci_resource_start(pdev, 2); - ggtt->mappable_end = pci_resource_len(pdev, 2); + ggtt->gmadr = + (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), + pci_resource_len(pdev, 2)); + ggtt->mappable_end = resource_size(&ggtt->gmadr); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); if (!err) @@ -3343,8 +3345,10 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) u16 snb_gmch_ctl; int err; - ggtt->mappable_base = pci_resource_start(pdev, 2); - ggtt->mappable_end = pci_resource_len(pdev, 2); + ggtt->gmadr = + (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), + pci_resource_len(pdev, 2)); + ggtt->mappable_end = resource_size(&ggtt->gmadr); /* 64/512MB is the current min/max we actually know of, but this is just * a coarse sanity check. @@ -3397,6 +3401,7 @@ static void i915_gmch_remove(struct i915_address_space *vm) static int i915_gmch_probe(struct i915_ggtt *ggtt) { struct drm_i915_private *dev_priv = ggtt->base.i915; + phys_addr_t gmadr_base; int ret; ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); @@ -3406,9 +3411,13 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) } intel_gtt_get(&ggtt->base.total, - &ggtt->mappable_base, + &gmadr_base, &ggtt->mappable_end); + ggtt->gmadr = + (struct resource) DEFINE_RES_MEM(gmadr_base, + ggtt->mappable_end); + ggtt->do_idle_maps = needs_idle_maps(dev_priv); ggtt->base.insert_page = i915_ggtt_insert_page; ggtt->base.insert_entries = i915_ggtt_insert_entries; @@ -3476,7 +3485,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) /* GMADR is the PCI mmio aperture into the global GTT. */ DRM_INFO("Memory usable by graphics device = %lluM\n", ggtt->base.total >> 20); - DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); + DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); DRM_DEBUG_DRIVER("GTT stolen size = %lluM\n", (u64)resource_size(&intel_graphics_stolen_res) >> 20); if (intel_vtd_active()) @@ -3507,14 +3516,14 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); - if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, - dev_priv->ggtt.mappable_base, + if (!io_mapping_init_wc(&dev_priv->ggtt.iomap, + dev_priv->ggtt.gmadr.start, dev_priv->ggtt.mappable_end)) { ret = -EIO; goto out_gtt_cleanup; } - ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); + ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end); /* * Initialise stolen early so that we may reserve preallocated diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index db20c72ecfc8..4a17ce36281a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -368,9 +368,9 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm) */ struct i915_ggtt { struct i915_address_space base; - struct io_mapping mappable; /* Mapping to our CPU mappable region */ - phys_addr_t mappable_base; /* PA of our GMADR */ + struct io_mapping iomap; /* Mapping to our CPU mappable region */ + struct resource gmadr; /* GMADR resource */ u64 mappable_end; /* End offset that we can CPU map */ /* Stolen memory is segmented in hardware with different portions diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 48418fb81066..aba50aa613f1 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -956,7 +956,7 @@ i915_error_object_create(struct drm_i915_private *i915, ggtt->base.insert_page(&ggtt->base, dma, slot, I915_CACHE_NONE, 0); - s = io_mapping_map_atomic_wc(&ggtt->mappable, slot); + s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); ret = compress_page(&compress, (void __force *)s, dst); io_mapping_unmap_atomic(s); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 92c11e70fea4..e0e7c48f45dc 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -311,7 +311,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) ptr = vma->iomap; if (ptr == NULL) { - ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, vma->node.start, vma->node.size); if (ptr == NULL) { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f0a8686f0513..f9ff1c7fa054 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14595,7 +14595,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.cursor_height = MAX_CURSOR_HEIGHT; } - dev->mode_config.fb_base = ggtt->mappable_base; + dev->mode_config.fb_base = ggtt->gmadr.start; DRM_DEBUG_KMS("%d display pipe%s available.\n", INTEL_INFO(dev_priv)->num_pipes, diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 1b397b41cb4f..41e9465d44a8 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -219,7 +219,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay) if (OVERLAY_NEEDS_PHYSICAL(dev_priv)) regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr; else - regs = io_mapping_map_wc(&dev_priv->ggtt.mappable, + regs = io_mapping_map_wc(&dev_priv->ggtt.iomap, overlay->flip_addr, PAGE_SIZE); @@ -1508,7 +1508,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay) regs = (struct overlay_registers __iomem *) overlay->reg_bo->phys_handle->vaddr; else - regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.mappable, + regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.iomap, overlay->flip_addr); return regs; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 6491cf0a4f46..4a28d713a7d8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1074,7 +1074,7 @@ static int igt_ggtt_page(void *arg) i915_gem_object_get_dma_address(obj, 0), offset, I915_CACHE_NONE, 0); - vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset); iowrite32(n, vaddr + n); io_mapping_unmap_atomic(vaddr); @@ -1092,7 +1092,7 @@ static int igt_ggtt_page(void *arg) i915_gem_object_get_dma_address(obj, 0), offset, I915_CACHE_NONE, 0); - vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset); val = ioread32(vaddr + n); io_mapping_unmap_atomic(vaddr); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 336e1afb250f..e96873f96116 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -110,8 +110,8 @@ void mock_init_ggtt(struct drm_i915_private *i915) ggtt->base.i915 = i915; - ggtt->mappable_base = 0; - ggtt->mappable_end = 2048 * PAGE_SIZE; + ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE); + ggtt->mappable_end = resource_size(&ggtt->gmadr); ggtt->base.total = 4096 * PAGE_SIZE; ggtt->base.clear_range = nop_clear_range; From b1ace60107e65b915a59b113f2aaeed8587f34fd Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Dec 2017 15:18:21 +0000 Subject: [PATCH 64/87] drm/i915: give stolen_usable_size a more suitable home Kick it out of i915_ggtt and keep it grouped with dsm and dsm_reserved, where it makes the most sense. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Paulo Zanoni Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211151822.20953-9-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 13 ++++++++++++- drivers/gpu/drm/i915/i915_gem_gtt.h | 10 ---------- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 ++--- drivers/gpu/drm/i915/intel_display.c | 3 +-- drivers/gpu/drm/i915/intel_fbdev.c | 3 +-- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7945af0037f8..d71e59e44d50 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2254,7 +2254,7 @@ struct drm_i915_private { /** * Data Stolen Memory - aka "i915 stolen memory" gives us the start and * end of stolen which we can optionally use to create GEM objects - * backed by stolen memory. Note that ggtt->stolen_usable_size tells us + * backed by stolen memory. Note that stolen_usable_size tells us * exactly how much of this we are actually allowed to use, given that * some portion of it is in fact reserved for use by hardware functions. */ @@ -2264,6 +2264,17 @@ struct drm_i915_private { */ struct resource dsm_reserved; + /* + * Stolen memory is segmented in hardware with different portions + * offlimits to certain functions. + * + * The drm_mm is initialised to the total accessible range, as found + * from the PCI config. On Broadwell+, this is further restricted to + * avoid the first page! The upper end of stolen memory is reserved for + * hardware functions and similarly removed from the accessible range. + */ + u32 stolen_usable_size; /* Total size minus reserved ranges */ + void __iomem *regs; struct intel_uncore uncore; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 4a17ce36281a..e5aa07ceb627 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -373,16 +373,6 @@ struct i915_ggtt { struct resource gmadr; /* GMADR resource */ u64 mappable_end; /* End offset that we can CPU map */ - /* Stolen memory is segmented in hardware with different portions - * offlimits to certain functions. - * - * The drm_mm is initialised to the total accessible range, as found - * from the PCI config. On Broadwell+, this is further restricted to - * avoid the first page! The upper end of stolen memory is reserved for - * hardware functions and similarly removed from the accessible range. - */ - u32 stolen_usable_size; /* Total size minus reserved ranges */ - /** "Graphics Stolen Memory" holds the global PTEs */ void __iomem *gsm; void (*invalidate)(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index b6cca0d4ecd3..63d568512db5 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -325,7 +325,6 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; dma_addr_t reserved_base, stolen_top; u32 reserved_total, reserved_size; u32 stolen_usable_start; @@ -416,12 +415,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) stolen_usable_start = 4096; - ggtt->stolen_usable_size = + dev_priv->stolen_usable_size = resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start; /* Basic memrange allocator for stolen space. */ drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start, - ggtt->stolen_usable_size); + dev_priv->stolen_usable_size); return 0; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f9ff1c7fa054..efa6c6d19664 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2639,7 +2639,6 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_i915_gem_object *obj = NULL; struct drm_mode_fb_cmd2 mode_cmd = { 0 }; struct drm_framebuffer *fb = &plane_config->fb->base; @@ -2655,7 +2654,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ - if (size_aligned * 2 > ggtt->stolen_usable_size) + if (size_aligned * 2 > dev_priv->stolen_usable_size) return false; mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index ea96682568e8..da48af11eb6b 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -115,7 +115,6 @@ static int intelfb_alloc(struct drm_fb_helper *helper, struct drm_framebuffer *fb; struct drm_device *dev = helper->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_mode_fb_cmd2 mode_cmd = {}; struct drm_i915_gem_object *obj; int size, ret; @@ -139,7 +138,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, * important and we should probably use that space with FBC or other * features. */ obj = NULL; - if (size * 2 < ggtt->stolen_usable_size) + if (size * 2 < dev_priv->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (obj == NULL) obj = i915_gem_object_create(dev_priv, size); From b7128ef125b400e42bab90155777e1def5bfcd31 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Dec 2017 15:18:22 +0000 Subject: [PATCH 65/87] drm/i915: prefer resource_size_t for everything stolen Keeps things consistent now that we make use of struct resource. This should keep us covered in case we ever get huge amounts of stolen memory. v2: bunch of missing conversions (Chris) Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Paulo Zanoni Reviewed-by: Chris Wilson Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211151822.20953-10-matthew.auld@intel.com --- drivers/char/agp/intel-gtt.c | 12 ++++---- drivers/gpu/drm/i915/i915_debugfs.c | 4 +-- drivers/gpu/drm/i915/i915_drv.h | 11 +++---- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++---- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 40 +++++++++++++------------- drivers/gpu/drm/i915/intel_pm.c | 10 +++---- include/drm/intel-gtt.h | 2 +- 8 files changed, 46 insertions(+), 45 deletions(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 0c86b4dfd59c..c6271ce250b3 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -80,7 +80,7 @@ static struct _intel_private { unsigned int needs_dmar : 1; phys_addr_t gma_bus_addr; /* Size of memory reserved for graphics by the BIOS */ - unsigned int stolen_size; + resource_size_t stolen_size; /* Total number of gtt entries. */ unsigned int gtt_total_entries; /* Part of the gtt that is mappable by the cpu, for those chips where @@ -333,13 +333,13 @@ static void i810_write_entry(dma_addr_t addr, unsigned int entry, writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } -static unsigned int intel_gtt_stolen_size(void) +static resource_size_t intel_gtt_stolen_size(void) { u16 gmch_ctrl; u8 rdct; int local = 0; static const int ddt[4] = { 0, 16, 32, 64 }; - unsigned int stolen_size = 0; + resource_size_t stolen_size = 0; if (INTEL_GTT_GEN == 1) return 0; /* no stolen mem on i81x */ @@ -417,8 +417,8 @@ static unsigned int intel_gtt_stolen_size(void) } if (stolen_size > 0) { - dev_info(&intel_private.bridge_dev->dev, "detected %dK %s memory\n", - stolen_size / KB(1), local ? "local" : "stolen"); + dev_info(&intel_private.bridge_dev->dev, "detected %lluK %s memory\n", + (u64)stolen_size / KB(1), local ? "local" : "stolen"); } else { dev_info(&intel_private.bridge_dev->dev, "no pre-allocated video memory detected\n"); @@ -1425,7 +1425,7 @@ EXPORT_SYMBOL(intel_gmch_probe); void intel_gtt_get(u64 *gtt_total, phys_addr_t *mappable_base, - u64 *mappable_end) + resource_size_t *mappable_end) { *gtt_total = intel_private.gtt_total_entries << PAGE_SHIFT; *mappable_base = intel_private.gma_bus_addr; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f91dd68c53a1..d8c6ec3cca71 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -522,8 +522,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data) seq_printf(m, "%u display objects (globally pinned), %llu bytes\n", dpy_count, dpy_size); - seq_printf(m, "%llu [%llu] gtt total\n", - ggtt->base.total, ggtt->mappable_end); + seq_printf(m, "%llu [%pa] gtt total\n", + ggtt->base.total, &ggtt->mappable_end); seq_printf(m, "Supported page sizes: %s\n", stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes, buf, sizeof(buf))); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d71e59e44d50..61196ff93901 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2273,7 +2273,7 @@ struct drm_i915_private { * avoid the first page! The upper end of stolen memory is reserved for * hardware functions and similarly removed from the accessible range. */ - u32 stolen_usable_size; /* Total size minus reserved ranges */ + resource_size_t stolen_usable_size; /* Total size minus reserved ranges */ void __iomem *regs; @@ -3932,12 +3932,13 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, u32 size); +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, - u32 stolen_offset, - u32 gtt_offset, - u32 size); + resource_size_t stolen_offset, + resource_size_t gtt_offset, + resource_size_t size); /* i915_gem_internal.c */ struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index fbc1f467273a..a7fe05666f72 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3354,7 +3354,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) * a coarse sanity check. */ if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { - DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); + DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); return -ENXIO; } @@ -3464,7 +3464,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) */ if (USES_GUC(dev_priv)) { ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP); - ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); + ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total); } if ((ggtt->base.total - 1) >> 32) { @@ -3472,13 +3472,13 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) " of address space! Found %lldM!\n", ggtt->base.total >> 20); ggtt->base.total = 1ULL << 32; - ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); + ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total); } if (ggtt->mappable_end > ggtt->base.total) { DRM_ERROR("mappable aperture extends past end of GGTT," - " aperture=%llx, total=%llx\n", - ggtt->mappable_end, ggtt->base.total); + " aperture=%pa, total=%llx\n", + &ggtt->mappable_end, ggtt->base.total); ggtt->mappable_end = ggtt->base.total; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e5aa07ceb627..a42890d9af38 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -371,7 +371,7 @@ struct i915_ggtt { struct io_mapping iomap; /* Mapping to our CPU mappable region */ struct resource gmadr; /* GMADR resource */ - u64 mappable_end; /* End offset that we can CPU map */ + resource_size_t mappable_end; /* End offset that we can CPU map */ /** "Graphics Stolen Memory" holds the global PTEs */ void __iomem *gsm; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 63d568512db5..d3f222fa6356 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -95,7 +95,7 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { struct resource stolen[2] = {*dsm, *dsm}; struct resource ggtt_res; - u64 ggtt_start; + resource_size_t ggtt_start; ggtt_start = I915_READ(PGTBL_CTL); if (IS_GEN4(dev_priv)) @@ -174,12 +174,12 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) } static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? CTG_STOLEN_RESERVED : ELK_STOLEN_RESERVED); - dma_addr_t stolen_top = dev_priv->dsm.end + 1; + resource_size_t stolen_top = dev_priv->dsm.end + 1; if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) { *base = 0; @@ -208,7 +208,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -240,7 +240,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -266,7 +266,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -298,10 +298,10 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - dma_addr_t *base, u32 *size) + resource_size_t *base, resource_size_t *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); - dma_addr_t stolen_top; + resource_size_t stolen_top; if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { *base = 0; @@ -325,9 +325,9 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { - dma_addr_t reserved_base, stolen_top; - u32 reserved_total, reserved_size; - u32 stolen_usable_start; + resource_size_t reserved_base, stolen_top; + resource_size_t reserved_total, reserved_size; + resource_size_t stolen_usable_start; mutex_init(&dev_priv->mm.stolen_lock); @@ -427,14 +427,13 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) static struct sg_table * i915_pages_create_for_stolen(struct drm_device *dev, - u32 offset, u32 size) + resource_size_t offset, resource_size_t size) { struct drm_i915_private *dev_priv = to_i915(dev); struct sg_table *st; struct scatterlist *sg; - GEM_BUG_ON(range_overflows_t(resource_size_t, offset, size, - resource_size(&dev_priv->dsm))); + GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm))); /* We hide that we have no struct page backing our stolen object * by wrapping the contiguous physical allocation with a fake @@ -532,7 +531,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, } struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, u32 size) +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size) { struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; @@ -565,9 +565,9 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, u32 size) struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv, - u32 stolen_offset, - u32 gtt_offset, - u32 size) + resource_size_t stolen_offset, + resource_size_t gtt_offset, + resource_size_t size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_i915_gem_object *obj; @@ -580,8 +580,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv lockdep_assert_held(&dev_priv->drm.struct_mutex); - DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%x, gtt_offset=%x, size=%x\n", - stolen_offset, gtt_offset, size); + DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", + &stolen_offset, >t_offset, &size); /* KISS and expect everything to be page-aligned */ if (WARN_ON(size == 0) || diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 57dcf8e1ff30..18779c6eb4bf 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7033,9 +7033,9 @@ static void cherryview_check_pctx(struct drm_i915_private *dev_priv) static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) { - unsigned long pctx_paddr, paddr; + resource_size_t pctx_paddr, paddr; + resource_size_t pctx_size = 32*1024; u32 pcbr; - int pctx_size = 32*1024; pcbr = I915_READ(VLV_PCBR); if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { @@ -7053,14 +7053,14 @@ static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *pctx; - unsigned long pctx_paddr; + resource_size_t pctx_paddr; + resource_size_t pctx_size = 24*1024; u32 pcbr; - int pctx_size = 24*1024; pcbr = I915_READ(VLV_PCBR); if (pcbr) { /* BIOS set it up already, grab the pre-alloc'd space */ - int pcbr_offset; + resource_size_t pcbr_offset; pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start; pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv, diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 8c577ca38e2e..2324c84a25c0 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -6,7 +6,7 @@ void intel_gtt_get(u64 *gtt_total, phys_addr_t *mappable_base, - u64 *mappable_end); + resource_size_t *mappable_end); int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, struct agp_bridge_data *bridge); From 0132a1a5d44d2cd32a249dbe999a88c2134a6bd1 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 12 Dec 2017 12:38:37 +0100 Subject: [PATCH 66/87] docs: fix, intel_guc_loader.c has been moved to intel_guc_fw.c With commit d9e2e0143c the 'GuC-specific firmware loader' doc section was removed from intel_guc_loader.c without a replacement. So lets remove it from the Kernel-doc:: .. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c :doc: GuC-specific firmware loader With commit e8668bbcb0 intel_guc_loader.c was renamed to to intel_guc_fw.c and to name just one, intel_guc_init_hw() was renamed to intel_guc_fw_upload(). Since we get errors in the Sphinx build like: - Error: Cannot open file ./drivers/gpu/drm/i915/intel_guc_loader.c Change the kernel-doc directive from intel_guc_loader.c to intel_guc_fw.c Signed-off-by: Markus Heiser [danvet: Rebase onto the partial fix 006c23327f8d ("documentation/gpu/i915: fix docs build error after file rename")] Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1513078717-12373-1-git-send-email-markus.heiser@darmarit.de --- Documentation/gpu/i915.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 84021142a8f3..41dc881b00dc 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -341,9 +341,6 @@ GuC GuC-specific firmware loader ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c - :doc: GuC-specific firmware loader - .. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c :internal: From 3ef98f50ca7cef5cc9b9bf89cd46830afbf4df68 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Dec 2017 20:40:40 +0000 Subject: [PATCH 67/87] drm/i915: Print an error message for a reset failure on gen2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since on gen2, we do not universally have a GPU reset implementation, we fail i915_reset() at intel_has_gpu_reset(). However, this is also intentionally disabled for CI testing and so it only has a debug message. Promote that debug message to a user-facing error message that should explain why their machine became unusable following the GPU hang. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171211204040.22858-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 54a8fca7e7b2..721ccce1832f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1903,7 +1903,10 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) } if (!intel_has_gpu_reset(i915)) { - DRM_DEBUG_DRIVER("GPU reset disabled\n"); + if (i915_modparams.reset) + dev_err(i915->drm.dev, "GPU reset not supported\n"); + else + DRM_DEBUG_DRIVER("GPU reset disabled\n"); goto error; } From 1875fe7ba15020a0d45856d16c17e57ebcbc0bec Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Dec 2017 11:35:32 +0000 Subject: [PATCH 68/87] drm/i915: Downgrade misleading "Memory usable" message It never meant what it said, as it was always the total size of the Global GTT and not a limit upon memory usage. Originally it served as a quick guide to the largest batch that could be submitted by userspace, an approximation to its maximum RSS, but was phrased badly. Today with the 48b ppgtt, it is even more meaningless. Replace with a more specific debug message; those wanting to know how much "video ram" they have should consult the userspace libraries for the relevant approximation. v2: Rebase Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171212113532.22574-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a7fe05666f72..a0579b0c8581 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3483,10 +3483,9 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) } /* GMADR is the PCI mmio aperture into the global GTT. */ - DRM_INFO("Memory usable by graphics device = %lluM\n", - ggtt->base.total >> 20); + DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->base.total >> 20); DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); - DRM_DEBUG_DRIVER("GTT stolen size = %lluM\n", + DRM_DEBUG_DRIVER("DSM size = %lluM\n", (u64)resource_size(&intel_graphics_stolen_res) >> 20); if (intel_vtd_active()) DRM_INFO("VT-d active for gfx access\n"); From ee42c00e1cb0a421fcf6d502d1ab919e6459ae48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Dec 2017 19:41:34 +0000 Subject: [PATCH 69/87] drm/i915: Bump timeout for wait_for_engines() Extract the timeout we use in i915_gem_idle_work_handler() and reuse it for wait_for_engines() in i915_gem_wait_for_idle(). It too has the same problem in sometimes having to wait for an extended period before the HW settles, so make use of the same timeout. References: 5427f207852d ("drm/i915: Bump wait-times for the final CS interrupt before parking") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211194135.27095-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 61196ff93901..7762eabd31c1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1586,6 +1586,8 @@ struct drm_i915_error_state_buf { loff_t pos; }; +#define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */ + #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ #define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e89496aec857..55e4475fabe0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3333,7 +3333,7 @@ i915_gem_idle_work_handler(struct work_struct *work) * Wait for last execlists context complete, but bail out in case a * new request is submitted. */ - end = ktime_add_ms(ktime_get(), 200); + end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT); do { if (new_requests_since_last_retire(dev_priv)) return; @@ -3533,7 +3533,7 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) static int wait_for_engines(struct drm_i915_private *i915) { - if (wait_for(intel_engines_are_idle(i915), 50)) { + if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { DRM_ERROR("Failed to idle engines, declaring wedged!\n"); i915_gem_set_wedged(i915); return -EIO; From 59e4b19d622ab22d138862ba287859e3aacd5478 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Dec 2017 19:41:35 +0000 Subject: [PATCH 70/87] drm/i915: Dump the engine state before declaring wedged from wait_for_engines() If wait_for_engines() fails and we resort to declaring the HW wedged, dump the engine state for debugging. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171211194135.27095-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 55e4475fabe0..8c92fcd6f805 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3534,7 +3534,18 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) static int wait_for_engines(struct drm_i915_private *i915) { if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { - DRM_ERROR("Failed to idle engines, declaring wedged!\n"); + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer(__func__); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + intel_engine_dump(engine, &p, + "%s", engine->name); + } + i915_gem_set_wedged(i915); return -EIO; } From 7d622351c94172a42bfe9b13bdb0fdc2be90ed3b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Dec 2017 09:48:02 +0000 Subject: [PATCH 71/87] drm/i915/fence: Use rcu to defer freeing of irq_work It is illegal to perform an immediate free of the struct irq_work from inside the irq_work callback (as irq_work_run_list modifies work->flags after execution of the work->func()). As we use the irq_work to coordinate the freeing of the callback from two different softirq paths, we need to defer the kfree from inside our irq_work callback, for which we can use kfree_rcu. Fixes: 81c0ed21aa91 ("drm/i915/fence: Avoid del_timer_sync() from inside a timer") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171213094802.28243-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_sw_fence.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index e8ca67a129d2..ac236b88c99c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb { struct dma_fence *dma; struct timer_list timer; struct irq_work work; + struct rcu_head rcu; }; static void timer_i915_sw_fence_wake(struct timer_list *t) @@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk) del_timer_sync(&cb->timer); dma_fence_put(cb->dma); - kfree(cb); + kfree_rcu(cb, rcu); } int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, From d7dc4131eb2a1efd670692b7fb578980d8f41762 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Dec 2017 13:21:48 +0000 Subject: [PATCH 72/87] drm/i915: Don't check #active_requests from i915_gem_wait_for_idle() i915_gem_wait_for_idle() is called from inside the shrinker, to ensure that we drain the last resources from the GPU in dire circumstances (OOM). As we may allocate whilst building a request, it is then possible to hit the shrinker with a request under construction, and so we must account for the incomplete request whilst waiting. In particular, we preincrement (in reserve_engine) the i915->gt.active_requests counter and mark the GPU as busy, therefore we can not use that counter for shortcircuiting the wait-for-idle. [ 950.859024] GEM_BUG_ON(i915->gt.active_requests) [ 950.859041] WARNING: CPU: 2 PID: 2178 at drivers/gpu/drm/i915/i915_gem.c:3615 i915_gem_wait_for_idle.part.56+0x166/0x4e0 [ 950.859041] Modules linked in: ccm tun fuse nf_conntrack_netbios_ns nf_conntrack_broadcast ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_mangle iptable_security iptable_raw arc4 iwldvm mac80211 snd_hda_codec_hdmi snd_hda_codec_idt snd_hda_codec_generic snd_hda_intel snd_hda_codec btusb snd_hda_core btrtl btbcm iwlwifi snd_hwdep btintel bluetooth snd_seq snd_seq_device snd_pcm ecdh_generic x86_pkg_temp_thermal tpm_infineon coretemp tpm_tis crc32_pclmul wmi_bmof crc32c_intel iTCO_wdt hp_wmi snd_timer iTCO_vendor_support sparse_keymap tpm_tis_core mei_me cfg80211 [ 950.859082] snd joydev tpm mei rfkill pcspkr wmi soundcore lpc_ich hp_accel lis3lv02d input_polldev binfmt_misc e1000e ptp serio_raw pps_core [ 950.859094] CPU: 2 PID: 2178 Comm: gem_exec_nop Tainted: G U 4.15.0-rc2+ #900 [ 950.859102] Hardware name: Hewlett-Packard HP ProBook 6360b/1620, BIOS 68SCF Ver. B.42 12/29/2010 [ 950.859107] task: c5119cb4 task.stack: f3ccb8d8 [ 950.859112] EIP: i915_gem_wait_for_idle.part.56+0x166/0x4e0 [ 950.859113] EFLAGS: 00010296 CPU: 2 [ 950.859114] EAX: 00000024 EBX: f36c1888 ECX: f777a044 EDX: 00000007 [ 950.859115] ESI: f36c1888 EDI: edd53958 EBP: edd53970 ESP: edd53938 [ 950.859116] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 950.859117] CR0: 80050033 CR2: b7f39000 CR3: 2f2b3000 CR4: 000406d0 [ 950.859118] Call Trace: [ 950.859125] ? drm_printk+0x70/0x70 [ 950.859129] i915_gem_wait_for_idle+0x18/0x30 [ 950.859133] i915_gem_shrink+0x360/0x410 [ 950.859138] ? vmpressure+0xa8/0xf0 [ 950.859142] ? ktime_get+0x4a/0x100 [ 950.859147] i915_gem_shrink_all+0x21/0x40 [ 950.859151] i915_gem_shrinker_oom+0x23/0x130 [ 950.859156] notifier_call_chain+0x4e/0x70 [ 950.859160] __blocking_notifier_call_chain+0x2f/0x60 [ 950.859164] blocking_notifier_call_chain+0x11/0x20 [ 950.859169] out_of_memory+0x207/0x280 [ 950.859174] __alloc_pages_nodemask+0xd47/0xe60 [ 950.859179] new_slab+0x32d/0x450 [ 950.859183] ___slab_alloc.constprop.81+0x358/0x4e0 [ 950.859189] ? i915_sw_fence_await_dma_fence+0x53/0x160 [ 950.859193] ? __slab_free+0x1fe/0x310 [ 950.859197] ? native_sched_clock+0x1e/0xc0 [ 950.859201] ? i915_gem_request_alloc+0xcf/0x510 [ 950.859205] ? sched_clock+0x9/0x10 [ 950.859209] __slab_alloc.constprop.80+0x29/0x40 [ 950.859212] ? __slab_alloc.constprop.80+0x29/0x40 [ 950.859216] kmem_cache_alloc_trace+0x160/0x1a0 [ 950.859220] ? i915_sw_fence_await_dma_fence+0x53/0x160 [ 950.859224] i915_sw_fence_await_dma_fence+0x53/0x160 [ 950.859229] i915_gem_request_await_dma_fence+0x1eb/0x390 [ 950.859233] i915_gem_request_await_object+0xee/0x230 [ 950.859239] i915_gem_do_execbuffer+0xc16/0x1200 [ 950.859246] ? irqtime_account_irq+0x3e/0xc0 [ 950.859251] ? irq_exit+0x4f/0xb0 [ 950.859257] ? smp_apic_timer_interrupt+0x5f/0x110 [ 950.859261] ? apic_timer_interrupt+0x35/0x3c [ 950.859266] i915_gem_execbuffer2_ioctl+0x212/0x440 [ 950.859270] ? apic_timer_interrupt+0x35/0x3c [ 950.859274] ? i915_gem_do_execbuffer+0x1200/0x1200 [ 950.859279] ? insn_get_seg_base+0x1b/0x50 [ 950.859283] ? i915_gem_do_execbuffer+0x1200/0x1200 [ 950.859287] drm_ioctl_kernel+0x51/0xa0 [ 950.859291] drm_ioctl+0x2a3/0x350 [ 950.859294] ? i915_gem_do_execbuffer+0x1200/0x1200 [ 950.859300] ? sched_clock+0x9/0x10 [ 950.859303] ? drm_getunique+0x70/0x70 [ 950.859308] do_vfs_ioctl+0x7d/0x640 [ 950.859311] ? native_sched_clock+0x1e/0xc0 [ 950.859315] ? sched_clock+0x9/0x10 [ 950.859319] ? sched_clock_cpu+0x13/0x120 [ 950.859323] SyS_ioctl+0x4e/0x80 [ 950.859326] do_fast_syscall_32+0x75/0x250 [ 950.859331] ? irq_exit+0x4f/0xb0 [ 950.859334] entry_SYSENTER_32+0x47/0x71 [ 950.859338] EIP: 0xb7f81d11 [ 950.859339] EFLAGS: 00000296 CPU: 2 [ 950.859340] EAX: ffffffda EBX: 00000003 ECX: 40406469 EDX: bfde4c20 [ 950.859340] ESI: 00000003 EDI: 40406469 EBP: 00000003 ESP: bfde4b38 [ 950.859341] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b [ 950.859343] Code: e8 30 60 01 00 83 c4 10 83 c3 04 39 f3 75 e0 8b 45 d8 8b 80 14 37 00 00 85 c0 74 13 68 dd 33 e4 c0 68 49 6f e3 c0 e8 4a 55 be ff <0f> ff 5e 5f b8 fe ff ff 3f bb 0a 00 00 00 e8 b7 14 c4 ff 8b 15 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171212132148.8124-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 2 -- drivers/gpu/drm/i915/intel_engine_cs.c | 6 ++---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8c92fcd6f805..8c3d801696b7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3571,9 +3571,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (ret) return ret; } - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); ret = wait_for_engines(i915); } else { diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index aad353195f17..510e0bc3a377 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1513,10 +1513,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; - if (READ_ONCE(dev_priv->gt.active_requests)) - return false; - - /* If the driver is wedged, HW state may be very inconsistent and + /* + * If the driver is wedged, HW state may be very inconsistent and * report that it is still busy, even though we have stopped using it. */ if (i915_terminally_wedged(&dev_priv->gpu_error)) From e30a7581bfc2d5619fed03ec7321c9e4ffb7e36f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Dec 2017 18:06:50 +0000 Subject: [PATCH 73/87] drm/i915: Mark up potential allocation paths within i915_sw_fence as might_sleep As kmalloc is allowed to block (if given the right flags), mark up the two i915_sw_fence routines that may call kmalloc as potential sleeping routines. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171212180652.22061-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_sw_fence.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index ac236b88c99c..3669f5eeb91e 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -303,6 +303,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, int pending; debug_fence_assert(fence); + might_sleep_if(gfpflags_allow_blocking(gfp)); if (i915_sw_fence_done(signaler)) return 0; @@ -419,6 +420,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, int ret; debug_fence_assert(fence); + might_sleep_if(gfpflags_allow_blocking(gfp)); if (dma_fence_is_signaled(dma)) return 0; @@ -465,6 +467,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, int ret = 0, pending; debug_fence_assert(fence); + might_sleep_if(gfpflags_allow_blocking(gfp)); if (write) { struct dma_fence **shared; From 2abe2f844645402e5d47012a04839d1c5cbffd0d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Dec 2017 18:06:51 +0000 Subject: [PATCH 74/87] drm/i915: Allow fence allocations to fail If a fence allocation fails in a blocking context, we will sleep on the fence as a last resort. We can therefore allow ourselves to fail and sleep on the fence instead of triggering a system-wide oom. This allows us to throttle malicious clients that are consuming lots of system resources by capping the amount of memory used by fences. Testcase: igt/gem_shrink/execbufX Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171212180652.22061-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_clflush.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.c | 6 +++--- drivers/gpu/drm/i915/i915_gem_request.h | 2 ++ 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index f663cd919795..b9b53ac14176 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -167,7 +167,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, i915_sw_fence_await_reservation(&clflush->wait, obj->resv, NULL, true, I915_FENCE_TIMEOUT, - GFP_KERNEL); + I915_FENCE_GFP); reservation_object_lock(obj->resv, NULL); reservation_object_add_excl_fence(obj->resv, &clflush->dma); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 21ce374d9924..648e7536ff51 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -617,7 +617,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) if (prev) i915_sw_fence_await_sw_fence_gfp(&req->submit, &prev->submit, - GFP_KERNEL); + I915_FENCE_GFP); } /* diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index c28a4ceb016d..4d5e2b714382 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -782,7 +782,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, if (to->engine == from->engine) { ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, - GFP_KERNEL); + I915_FENCE_GFP); return ret < 0 ? ret : 0; } @@ -810,7 +810,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, await_dma_fence: ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, - GFP_KERNEL); + I915_FENCE_GFP); return ret < 0 ? ret : 0; } @@ -861,7 +861,7 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, else ret = i915_sw_fence_await_dma_fence(&req->submit, fence, I915_FENCE_TIMEOUT, - GFP_KERNEL); + I915_FENCE_GFP); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 26249f39de67..0d6d39f19506 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -204,6 +204,8 @@ struct drm_i915_gem_request { struct list_head client_link; }; +#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) + extern const struct dma_fence_ops i915_fence_ops; static inline bool dma_fence_is_i915(const struct dma_fence *fence) From 31c70f97bec3107805df0f491485e7eacbc3a3ae Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Dec 2017 18:06:52 +0000 Subject: [PATCH 75/87] drm/i915: Ratelimit request allocation under oom If we fail to allocate a request, we can reap the outstanding requests and push them to the request's slab's freelist before trying again. This forces us to ratelimit malicious clients that tie up all of the system resources in requests, instead of causing a system-wide oom. Testcase: igt/gem_shrink/execbuf1 Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171212180652.22061-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 4d5e2b714382..59f023bb7015 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -677,10 +677,21 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * * Do not use kmem_cache_zalloc() here! */ - req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); - if (!req) { - ret = -ENOMEM; - goto err_unreserve; + req = kmem_cache_alloc(dev_priv->requests, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + if (unlikely(!req)) { + /* Ratelimit ourselves to prevent oom from malicious clients */ + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_LOCKED | + I915_WAIT_INTERRUPTIBLE); + if (ret) + goto err_unreserve; + + req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto err_unreserve; + } } req->timeline = i915_gem_context_lookup_timeline(ctx, engine); From 6ca9a2beb54a385c8cc5e39a9d2ef49945896c64 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Dec 2017 13:43:47 +0000 Subject: [PATCH 76/87] drm/i915: Unwind i915_gem_init() failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since Michal introduced new user controllable errors other than -EIO during i915_gem_init(), we need to actually unwind on the error path as we have to abort the module load (and we expect to do so cleanly!). As we now teardown key state and then mark the driver as wedged (on EIO), we have to be careful to not allow ourselves to resume and unwedge, thus attempting to use the uninitialised driver. v2: Try not to free driver state for the suppressed EIO v3: Use load-fault-injection to test both error/recovery paths. References: 8620eb1dbbf2 ("drm/i915/uc: Don't use -EIO to report missing firmware") Signed-off-by: Chris Wilson Cc: Michal Wajdeczko Cc: Joonas Lahtinen Cc: Sagar Arun Kamble Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20171213134347.4608-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 80 +++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8c3d801696b7..13fa26238e89 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4865,7 +4865,8 @@ void i915_gem_resume(struct drm_i915_private *i915) i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(i915); - /* As we didn't flush the kernel context before suspend, we cannot + /* + * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ @@ -4886,8 +4887,10 @@ void i915_gem_resume(struct drm_i915_private *i915) return; err_wedged: - DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); - i915_gem_set_wedged(i915); + if (!i915_terminally_wedged(&i915->gpu_error)) { + DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); + i915_gem_set_wedged(i915); + } goto out_unlock; } @@ -5170,22 +5173,28 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); ret = i915_gem_init_ggtt(dev_priv); - if (ret) - goto out_unlock; + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_unlock; + } ret = i915_gem_contexts_init(dev_priv); - if (ret) - goto out_unlock; + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_ggtt; + } ret = intel_engines_init(dev_priv); - if (ret) - goto out_unlock; + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_context; + } intel_init_gt_powersave(dev_priv); ret = i915_gem_init_hw(dev_priv); if (ret) - goto out_unlock; + goto err_pm; /* * Despite its name intel_init_clock_gating applies both display @@ -5199,9 +5208,53 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_init_clock_gating(dev_priv); ret = __intel_engines_record_defaults(dev_priv); -out_unlock: + if (ret) + goto err_init_hw; + + if (i915_inject_load_failure()) { + ret = -ENODEV; + goto err_init_hw; + } + + if (i915_inject_load_failure()) { + ret = -EIO; + goto err_init_hw; + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&dev_priv->drm.struct_mutex); + + return 0; + + /* + * Unwinding is complicated by that we want to handle -EIO to mean + * disable GPU submission but keep KMS alive. We want to mark the + * HW as irrevisibly wedged, but keep enough state around that the + * driver doesn't explode during runtime. + */ +err_init_hw: + i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); + i915_gem_contexts_lost(dev_priv); + intel_uc_fini_hw(dev_priv); +err_pm: + if (ret != -EIO) { + intel_cleanup_gt_powersave(dev_priv); + i915_gem_cleanup_engines(dev_priv); + } +err_context: + if (ret != -EIO) + i915_gem_contexts_fini(dev_priv); +err_ggtt: +err_unlock: + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&dev_priv->drm.struct_mutex); + + if (ret != -EIO) + i915_gem_cleanup_userptr(dev_priv); + if (ret == -EIO) { - /* Allow engine initialisation to fail by marking the GPU as + /* + * Allow engine initialisation to fail by marking the GPU as * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ @@ -5211,9 +5264,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) } ret = 0; } - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_drain_freed_objects(dev_priv); return ret; } From 1bbbca0db722d137ed426368bff42b17808ad11e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 13 Dec 2017 23:13:46 +0100 Subject: [PATCH 77/87] drm/i915/guc: Move shared data allocation away from submission path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need shared data for actions (e.g. guc suspend/resume), and we're using those with GuC submission disabled. Let's introduce intel_guc_init and move shared data alloc there. This fixes GPF during module unload with HuC, but without GuC submission: BUG: unable to handle kernel NULL pointer dereference at 000000005aee7809 IP: intel_guc_suspend+0x34/0x140 [i915] PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: i915(O-) netconsole x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mei_me i2c_i801 mei prime_numbers [last unloaded: i915] CPU: 2 PID: 2794 Comm: rmmod Tainted: G U W O 4.15.0-rc2+ #297 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016 task: 0000000055945c61 task.stack: 00000000264ccb43 RIP: 0010:intel_guc_suspend+0x34/0x140 [i915] RSP: 0018:ffffc90000483df8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff880829180000 RCX: 0000000000000000 RDX: 0000000000000006 RSI: ffff880844c2c938 RDI: ffff880844c2c000 RBP: ffff880829180000 R08: 00000000a29c58c1 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa040ba40 R13: ffffffffa040bab0 R14: ffff88084a195060 R15: 000055df3ef357a0 FS: 00007ff43c043740(0000) GS:ffff88084e200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000f9 CR3: 000000083f179005 CR4: 00000000003606e0 Call Trace: i915_gem_suspend+0x9d/0x130 [i915] ? i915_driver_unload+0x68/0x180 [i915] i915_driver_unload+0x70/0x180 [i915] i915_pci_remove+0x15/0x20 [i915] pci_device_remove+0x36/0xb0 device_release_driver_internal+0x15f/0x220 driver_detach+0x3a/0x80 bus_remove_driver+0x58/0xd0 pci_unregister_driver+0x29/0x90 SyS_delete_module+0x150/0x1e0 entry_SYSCALL_64_fastpath+0x23/0x9a RIP: 0033:0x7ff43b51b5c7 RSP: 002b:00007ffe6825a758 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007ff43b51b5c7 RDX: 000000000000000a RSI: 0000000000000800 RDI: 000055df3ef35808 RBP: 0000000000000000 R08: 00007ffe682596d1 R09: 0000000000000000 R10: 00007ff43b594880 R11: 0000000000000206 R12: 000055df3ef357a0 R13: 00007ffe68259740 R14: 000055df3ef35260 R15: 000055df3ef357a0 Code: 00 00 02 74 03 31 c0 c3 53 48 89 fb 48 83 ec 10 e8 52 0f f8 ff 48 b8 01 05 00 00 02 00 00 00 48 89 44 24 04 48 8b 83 00 12 00 00 80 f9 00 00 00 01 0f 84 a7 00 00 00 f6 80 98 00 00 00 01 0f RIP: intel_guc_suspend+0x34/0x140 [i915] RSP: ffffc90000483df8 CR2: 00000000000000f9 ---[ end trace 23a192a61d937a3e ]--- Fixes: b8e5eb960b28 ("drm/i915/guc: Allocate separate shared data object for GuC communication") Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Michal Wajdeczko Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-1-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_guc.c | 51 +++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 2 + drivers/gpu/drm/i915/intel_guc_submission.c | 37 +-------------- drivers/gpu/drm/i915/intel_uc.c | 10 ++-- 4 files changed, 60 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 177ee69ca9b1..92ed22f38fc4 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -69,6 +69,57 @@ void intel_guc_init_early(struct intel_guc *guc) guc->notify = gen8_guc_raise_irq; } +static int guc_shared_data_create(struct intel_guc *guc) +{ + struct i915_vma *vma; + void *vaddr; + + vma = intel_guc_allocate_vma(guc, PAGE_SIZE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + i915_vma_unpin_and_release(&vma); + return PTR_ERR(vaddr); + } + + guc->shared_data = vma; + guc->shared_data_vaddr = vaddr; + + return 0; +} + +static void guc_shared_data_destroy(struct intel_guc *guc) +{ + i915_gem_object_unpin_map(guc->shared_data->obj); + i915_vma_unpin_and_release(&guc->shared_data); +} + +int intel_guc_init(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + int ret; + + ret = guc_shared_data_create(guc); + if (ret) + return ret; + GEM_BUG_ON(!guc->shared_data); + + /* We need to notify the guc whenever we change the GGTT */ + i915_ggtt_enable_guc(dev_priv); + + return 0; +} + +void intel_guc_fini(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + i915_ggtt_disable_guc(dev_priv); + guc_shared_data_destroy(guc); +} + static u32 get_gt_type(struct drm_i915_private *dev_priv) { /* XXX: GT type based on PCI device ID? field seems unused by fw */ diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 59856726d2bc..81659e223e11 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -119,6 +119,8 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) void intel_guc_init_early(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_init_params(struct intel_guc *guc); +int intel_guc_init(struct intel_guc *guc); +void intel_guc_fini(struct intel_guc *guc); int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); int intel_guc_sample_forcewake(struct intel_guc *guc); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 912ff143d531..c020560c395e 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -447,33 +447,6 @@ static void guc_stage_desc_fini(struct intel_guc *guc, memset(desc, 0, sizeof(*desc)); } -static int guc_shared_data_create(struct intel_guc *guc) -{ - struct i915_vma *vma; - void *vaddr; - - vma = intel_guc_allocate_vma(guc, PAGE_SIZE); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - i915_vma_unpin_and_release(&vma); - return PTR_ERR(vaddr); - } - - guc->shared_data = vma; - guc->shared_data_vaddr = vaddr; - - return 0; -} - -static void guc_shared_data_destroy(struct intel_guc *guc) -{ - i915_gem_object_unpin_map(guc->shared_data->obj); - i915_vma_unpin_and_release(&guc->shared_data); -} - /* Construct a Work Item and append it to the GuC's Work Queue */ static void guc_wq_item_append(struct intel_guc_client *client, u32 target_engine, u32 context_desc, @@ -1279,14 +1252,9 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->stage_desc_pool); - ret = guc_shared_data_create(guc); - if (ret) - goto err_stage_desc_pool; - GEM_BUG_ON(!guc->shared_data); - ret = intel_guc_log_create(guc); if (ret < 0) - goto err_shared_data; + goto err_stage_desc_pool; ret = guc_preempt_work_create(guc); if (ret) @@ -1304,8 +1272,6 @@ int intel_guc_submission_init(struct intel_guc *guc) guc_preempt_work_destroy(guc); err_log: intel_guc_log_destroy(guc); -err_shared_data: - guc_shared_data_destroy(guc); err_stage_desc_pool: guc_stage_desc_pool_destroy(guc); return ret; @@ -1316,7 +1282,6 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_ads_destroy(guc); guc_preempt_work_destroy(guc); intel_guc_log_destroy(guc); - guc_shared_data_destroy(guc); guc_stage_desc_pool_destroy(guc); } diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 461047c86e0d..3040a0e00142 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -24,6 +24,7 @@ #include "intel_uc.h" #include "intel_guc_submission.h" +#include "intel_guc.h" #include "i915_drv.h" /* Reset GuC providing us with fresh state for both GuC and HuC. @@ -204,8 +205,9 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) guc_disable_communication(guc); gen9_reset_guc_interrupts(dev_priv); - /* We need to notify the guc whenever we change the GGTT */ - i915_ggtt_enable_guc(dev_priv); + ret = intel_guc_init(guc); + if (ret) + goto err_out; if (USES_GUC_SUBMISSION(dev_priv)) { /* @@ -298,7 +300,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) if (USES_GUC_SUBMISSION(dev_priv)) intel_guc_submission_fini(guc); err_guc: - i915_ggtt_disable_guc(dev_priv); + intel_guc_fini(guc); err_out: /* * Note that there is no fallback as either user explicitly asked for @@ -330,5 +332,5 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) intel_guc_submission_fini(guc); } - i915_ggtt_disable_guc(dev_priv); + intel_guc_fini(guc); } From 3176ff49bc3e56871ec9f338cac759dacd0135e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 13 Dec 2017 23:13:47 +0100 Subject: [PATCH 78/87] drm/i915/guc: Move GuC workqueue allocations outside of the mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gets rid of the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc2-CI-Patchwork_7428+ #1 Not tainted ------------------------------------------------------ debugfs_test/1351 is trying to acquire lock: (&dev->struct_mutex){+.+.}, at: [<000000009d90d1a3>] i915_mutex_lock_interruptible+0x47/0x130 [i915] but task is already holding lock: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #6 (&mm->mmap_sem){++++}: __might_fault+0x63/0x90 _copy_to_user+0x1e/0x70 filldir+0x8c/0xf0 dcache_readdir+0xeb/0x160 iterate_dir+0xe6/0x150 SyS_getdents+0xa0/0x130 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #5 (&sb->s_type->i_mutex_key#5){++++}: lockref_get+0x9/0x20 -> #4 ((completion)&req.done){+.+.}: wait_for_common+0x54/0x210 devtmpfs_create_node+0x130/0x150 device_add+0x5ad/0x5e0 device_create_groups_vargs+0xd4/0xe0 device_create+0x35/0x40 msr_device_create+0x22/0x40 cpuhp_invoke_callback+0xc5/0xbf0 cpuhp_thread_fun+0x167/0x210 smpboot_thread_fn+0x17f/0x270 kthread+0x173/0x1b0 ret_from_fork+0x24/0x30 -> #3 (cpuhp_state-up){+.+.}: cpuhp_issue_call+0x132/0x1c0 __cpuhp_setup_state_cpuslocked+0x12f/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_writeback_init+0x3a/0x5c start_kernel+0x393/0x3e2 secondary_startup_64+0xa5/0xb0 -> #2 (cpuhp_state_mutex){+.+.}: __mutex_lock+0x81/0x9b0 __cpuhp_setup_state_cpuslocked+0x4b/0x2a0 __cpuhp_setup_state+0x3a/0x50 page_alloc_init+0x1f/0x26 start_kernel+0x139/0x3e2 secondary_startup_64+0xa5/0xb0 -> #1 (cpu_hotplug_lock.rw_sem){++++}: cpus_read_lock+0x34/0xa0 apply_workqueue_attrs+0xd/0x40 __alloc_workqueue_key+0x2c7/0x4e1 intel_guc_submission_init+0x10c/0x650 [i915] intel_uc_init_hw+0x29e/0x460 [i915] i915_gem_init_hw+0xca/0x290 [i915] i915_gem_init+0x115/0x3a0 [i915] i915_driver_load+0x9a8/0x16c0 [i915] i915_pci_probe+0x2e/0x90 [i915] pci_device_probe+0x9c/0x120 driver_probe_device+0x2a3/0x480 __driver_attach+0xd9/0xe0 bus_for_each_dev+0x57/0x90 bus_add_driver+0x168/0x260 driver_register+0x52/0xc0 do_one_initcall+0x39/0x150 do_init_module+0x56/0x1ef load_module+0x231c/0x2d70 SyS_finit_module+0xa5/0xe0 entry_SYSCALL_64_fastpath+0x1c/0x89 -> #0 (&dev->struct_mutex){+.+.}: lock_acquire+0xaf/0x200 __mutex_lock+0x81/0x9b0 i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#5 --> &mm->mmap_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&sb->s_type->i_mutex_key#5); lock(&mm->mmap_sem); lock(&dev->struct_mutex); *** DEADLOCK *** 1 lock held by debugfs_test/1351: #0: (&mm->mmap_sem){++++}, at: [<000000005df01c1e>] __do_page_fault+0x106/0x560 stack backtrace: CPU: 2 PID: 1351 Comm: debugfs_test Not tainted 4.15.0-rc2-CI-Patchwork_7428+ #1 Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0057.2017.0119.1758 01/19/2017 Call Trace: dump_stack+0x5f/0x86 print_circular_bug+0x230/0x3b0 check_prev_add+0x439/0x7b0 ? lockdep_init_map_crosslock+0x20/0x20 ? unwind_get_return_address+0x16/0x30 ? __lock_acquire+0x1385/0x15a0 __lock_acquire+0x1385/0x15a0 lock_acquire+0xaf/0x200 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] __mutex_lock+0x81/0x9b0 ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] ? i915_mutex_lock_interruptible+0x47/0x130 [i915] i915_mutex_lock_interruptible+0x47/0x130 [i915] ? __pm_runtime_resume+0x4f/0x80 i915_gem_fault+0x201/0x760 [i915] __do_fault+0x15/0x70 __handle_mm_fault+0x85b/0xe40 handle_mm_fault+0x14f/0x2f0 __do_page_fault+0x2d1/0x560 page_fault+0x22/0x30 RIP: 0033:0x7f98d6f49116 RSP: 002b:00007ffd6ffc3278 EFLAGS: 00010283 RAX: 00007f98d39a2bc0 RBX: 0000000000000000 RCX: 0000000000001680 RDX: 0000000000001680 RSI: 00007ffd6ffc3400 RDI: 00007f98d39a2bc0 RBP: 00007ffd6ffc33a0 R08: 0000000000000000 R09: 00000000000005a0 R10: 000055e847c2a830 R11: 0000000000000002 R12: 0000000000000001 R13: 000055e847c1d040 R14: 00007ffd6ffc3400 R15: 00007f98d6752ba0 v2: Init preempt_work unconditionally (Chris) v3: Mention that we need the enable_guc=1 for lockdep splat (Chris) Testcase: igt/debugfs_test/read_all_entries # with i915.enable_guc=1 Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Michal Wajdeczko Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-2-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_gem.c | 4 ++ drivers/gpu/drm/i915/intel_guc.c | 57 +++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 2 + drivers/gpu/drm/i915/intel_guc_log.c | 23 ------- drivers/gpu/drm/i915/intel_guc_submission.c | 70 +++++---------------- drivers/gpu/drm/i915/intel_guc_submission.h | 2 + drivers/gpu/drm/i915/intel_uc.c | 26 ++++++++ drivers/gpu/drm/i915/intel_uc.h | 2 + 9 files changed, 110 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 721ccce1832f..285c8b238bff 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -621,6 +621,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) i915_gem_contexts_fini(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); + intel_uc_fini_wq(dev_priv); i915_gem_cleanup_userptr(dev_priv); i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 13fa26238e89..2c13e3a4f45a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5163,6 +5163,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) return ret; + ret = intel_uc_init_wq(dev_priv); + if (ret) + return ret; + /* This is just a security blanket to placate dragons. * On some systems, we very sporadically observe that the first TLBs * used by the CS may be stale, despite us poking the TLB reset. If diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 92ed22f38fc4..3c6bf5a34c3c 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -69,6 +69,63 @@ void intel_guc_init_early(struct intel_guc *guc) guc->notify = gen8_guc_raise_irq; } +int intel_guc_init_wq(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + /* + * GuC log buffer flush work item has to do register access to + * send the ack to GuC and this work item, if not synced before + * suspend, can potentially get executed after the GFX device is + * suspended. + * By marking the WQ as freezable, we don't have to bother about + * flushing of this work item from the suspend hooks, the pending + * work item if any will be either executed before the suspend + * or scheduled later on resume. This way the handling of work + * item can be kept same between system suspend & rpm suspend. + */ + guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", + WQ_HIGHPRI | WQ_FREEZABLE); + if (!guc->log.runtime.flush_wq) + return -ENOMEM; + + /* + * Even though both sending GuC action, and adding a new workitem to + * GuC workqueue are serialized (each with its own locking), since + * we're using mutliple engines, it's possible that we're going to + * issue a preempt request with two (or more - each for different + * engine) workitems in GuC queue. In this situation, GuC may submit + * all of them, which will make us very confused. + * Our preemption contexts may even already be complete - before we + * even had the chance to sent the preempt action to GuC!. Rather + * than introducing yet another lock, we can just use ordered workqueue + * to make sure we're always sending a single preemption request with a + * single workitem. + */ + if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) && + USES_GUC_SUBMISSION(dev_priv)) { + guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt", + WQ_HIGHPRI); + if (!guc->preempt_wq) { + destroy_workqueue(guc->log.runtime.flush_wq); + return -ENOMEM; + } + } + + return 0; +} + +void intel_guc_fini_wq(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) && + USES_GUC_SUBMISSION(dev_priv)) + destroy_workqueue(guc->preempt_wq); + + destroy_workqueue(guc->log.runtime.flush_wq); +} + static int guc_shared_data_create(struct intel_guc *guc) { struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 81659e223e11..52856a97477d 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -119,6 +119,8 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) void intel_guc_init_early(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_init_params(struct intel_guc *guc); +int intel_guc_init_wq(struct intel_guc *guc); +void intel_guc_fini_wq(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); void intel_guc_fini(struct intel_guc *guc); int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index 1a2c5eed9929..eaedd63e3819 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -411,30 +411,8 @@ static int guc_log_runtime_create(struct intel_guc *guc) guc->log.runtime.relay_chan = guc_log_relay_chan; INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work); - - /* - * GuC log buffer flush work item has to do register access to - * send the ack to GuC and this work item, if not synced before - * suspend, can potentially get executed after the GFX device is - * suspended. - * By marking the WQ as freezable, we don't have to bother about - * flushing of this work item from the suspend hooks, the pending - * work item if any will be either executed before the suspend - * or scheduled later on resume. This way the handling of work - * item can be kept same between system suspend & rpm suspend. - */ - guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", - WQ_HIGHPRI | WQ_FREEZABLE); - if (!guc->log.runtime.flush_wq) { - DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); - ret = -ENOMEM; - goto err_relaychan; - } - return 0; -err_relaychan: - relay_close(guc->log.runtime.relay_chan); err_vaddr: i915_gem_object_unpin_map(guc->log.vma->obj); guc->log.runtime.buf_addr = NULL; @@ -450,7 +428,6 @@ static void guc_log_runtime_destroy(struct intel_guc *guc) if (!guc_log_has_runtime(guc)) return; - destroy_workqueue(guc->log.runtime.flush_wq); relay_close(guc->log.runtime.relay_chan); i915_gem_object_unpin_map(guc->log.vma->obj); guc->log.runtime.buf_addr = NULL; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index c020560c395e..8f4b274d66a7 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -1187,57 +1187,15 @@ static void guc_ads_destroy(struct intel_guc *guc) i915_vma_unpin_and_release(&guc->ads_vma); } -static int guc_preempt_work_create(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* - * Even though both sending GuC action, and adding a new workitem to - * GuC workqueue are serialized (each with its own locking), since - * we're using mutliple engines, it's possible that we're going to - * issue a preempt request with two (or more - each for different - * engine) workitems in GuC queue. In this situation, GuC may submit - * all of them, which will make us very confused. - * Our preemption contexts may even already be complete - before we - * even had the chance to sent the preempt action to GuC!. Rather - * than introducing yet another lock, we can just use ordered workqueue - * to make sure we're always sending a single preemption request with a - * single workitem. - */ - guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt", - WQ_HIGHPRI); - if (!guc->preempt_wq) - return -ENOMEM; - - for_each_engine(engine, dev_priv, id) { - guc->preempt_work[id].engine = engine; - INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context); - } - - return 0; -} - -static void guc_preempt_work_destroy(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) - cancel_work_sync(&guc->preempt_work[id].work); - - destroy_workqueue(guc->preempt_wq); - guc->preempt_wq = NULL; -} - /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. */ int intel_guc_submission_init(struct intel_guc *guc) { + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; int ret; if (guc->stage_desc_pool) @@ -1256,20 +1214,18 @@ int intel_guc_submission_init(struct intel_guc *guc) if (ret < 0) goto err_stage_desc_pool; - ret = guc_preempt_work_create(guc); - if (ret) - goto err_log; - GEM_BUG_ON(!guc->preempt_wq); - ret = guc_ads_create(guc); if (ret < 0) - goto err_wq; + goto err_log; GEM_BUG_ON(!guc->ads_vma); + for_each_engine(engine, dev_priv, id) { + guc->preempt_work[id].engine = engine; + INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context); + } + return 0; -err_wq: - guc_preempt_work_destroy(guc); err_log: intel_guc_log_destroy(guc); err_stage_desc_pool: @@ -1279,8 +1235,14 @@ int intel_guc_submission_init(struct intel_guc *guc) void intel_guc_submission_fini(struct intel_guc *guc) { + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) + cancel_work_sync(&guc->preempt_work[id].work); + guc_ads_destroy(guc); - guc_preempt_work_destroy(guc); intel_guc_log_destroy(guc); guc_stage_desc_pool_destroy(guc); } diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h index 021fe85c8f71..fb081cefef93 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/intel_guc_submission.h @@ -77,5 +77,7 @@ int intel_guc_submission_init(struct intel_guc *guc); int intel_guc_submission_enable(struct intel_guc *guc); void intel_guc_submission_disable(struct intel_guc *guc); void intel_guc_submission_fini(struct intel_guc *guc); +int intel_guc_preempt_work_create(struct intel_guc *guc); +void intel_guc_preempt_work_destroy(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 3040a0e00142..785850838a44 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -188,6 +188,32 @@ static void guc_disable_communication(struct intel_guc *guc) guc->send = intel_guc_send_nop; } +int intel_uc_init_wq(struct drm_i915_private *dev_priv) +{ + int ret; + + if (!USES_GUC(dev_priv)) + return 0; + + ret = intel_guc_init_wq(&dev_priv->guc); + if (ret) { + DRM_ERROR("Couldn't allocate workqueues for GuC\n"); + return ret; + } + + return 0; +} + +void intel_uc_fini_wq(struct drm_i915_private *dev_priv) +{ + if (!USES_GUC(dev_priv)) + return; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + + intel_guc_fini_wq(&dev_priv->guc); +} + int intel_uc_init_hw(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 7a59e2486e9e..53edfeaf56b0 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -33,6 +33,8 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv); void intel_uc_init_mmio(struct drm_i915_private *dev_priv); void intel_uc_init_fw(struct drm_i915_private *dev_priv); void intel_uc_fini_fw(struct drm_i915_private *dev_priv); +int intel_uc_init_wq(struct drm_i915_private *dev_priv); +void intel_uc_fini_wq(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); From 61b5c1587dd82a8493d7838bb2f9289fd0903364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 13 Dec 2017 23:13:48 +0100 Subject: [PATCH 79/87] drm/i915/guc: Extract guc_init from guc_init_hw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After GPU reset, GuC HW needs to be reinitialized (with FW reload). Unfortunately, we're doing some extra work there (mostly allocating stuff), work that can be moved to guc_init and called once at driver load time. As a side effect we're no longer hitting an assert in i915_ggtt_enable_guc on suspend/resume. v2: Do not duplicate disable_communication / reset_guc_interrupts v3: Add proper teardown after rebase References: 04f7b24eccdf ("drm/i915/guc: Assert that we switch between known ggtt->invalidate functions") Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Michal Wajdeczko Cc: Sagar Arun Kamble Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-3-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_gem.c | 8 +++- drivers/gpu/drm/i915/intel_uc.c | 81 +++++++++++++++++++++------------ drivers/gpu/drm/i915/intel_uc.h | 2 + 4 files changed, 61 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 285c8b238bff..ca9f4b2862eb 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -617,6 +617,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); intel_uc_fini_hw(dev_priv); + intel_uc_fini(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_contexts_fini(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2c13e3a4f45a..4a7f5579a7a5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5196,10 +5196,14 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_init_gt_powersave(dev_priv); - ret = i915_gem_init_hw(dev_priv); + ret = intel_uc_init(dev_priv); if (ret) goto err_pm; + ret = i915_gem_init_hw(dev_priv); + if (ret) + goto err_uc_init; + /* * Despite its name intel_init_clock_gating applies both display * clock gating workarounds; GT mmio workarounds and the occasional @@ -5240,6 +5244,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); i915_gem_contexts_lost(dev_priv); intel_uc_fini_hw(dev_priv); +err_uc_init: + intel_uc_fini(dev_priv); err_pm: if (ret != -EIO) { intel_cleanup_gt_powersave(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 785850838a44..907deac6e3fa 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -214,6 +214,51 @@ void intel_uc_fini_wq(struct drm_i915_private *dev_priv) intel_guc_fini_wq(&dev_priv->guc); } +int intel_uc_init(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + int ret; + + if (!USES_GUC(dev_priv)) + return 0; + + if (!HAS_GUC(dev_priv)) + return -ENODEV; + + ret = intel_guc_init(guc); + if (ret) + return ret; + + if (USES_GUC_SUBMISSION(dev_priv)) { + /* + * This is stuff we need to have available at fw load time + * if we are planning to enable submission later + */ + ret = intel_guc_submission_init(guc); + if (ret) { + intel_guc_fini(guc); + return ret; + } + } + + return 0; +} + +void intel_uc_fini(struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + + if (!USES_GUC(dev_priv)) + return; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + + if (USES_GUC_SUBMISSION(dev_priv)) + intel_guc_submission_fini(guc); + + intel_guc_fini(guc); +} + int intel_uc_init_hw(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -223,28 +268,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) if (!USES_GUC(dev_priv)) return 0; - if (!HAS_GUC(dev_priv)) { - ret = -ENODEV; - goto err_out; - } + GEM_BUG_ON(!HAS_GUC(dev_priv)); guc_disable_communication(guc); gen9_reset_guc_interrupts(dev_priv); - ret = intel_guc_init(guc); - if (ret) - goto err_out; - - if (USES_GUC_SUBMISSION(dev_priv)) { - /* - * This is stuff we need to have available at fw load time - * if we are planning to enable submission later - */ - ret = intel_guc_submission_init(guc); - if (ret) - goto err_guc; - } - /* init WOPCM */ I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); I915_WRITE(DMA_GUC_WOPCM_OFFSET, @@ -264,12 +292,12 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) */ ret = __intel_uc_reset_hw(dev_priv); if (ret) - goto err_submission; + goto err_out; if (USES_HUC(dev_priv)) { ret = intel_huc_init_hw(huc); if (ret) - goto err_submission; + goto err_out; } intel_guc_init_params(guc); @@ -322,11 +350,6 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) guc_disable_communication(guc); err_log_capture: guc_capture_load_err_log(guc); -err_submission: - if (USES_GUC_SUBMISSION(dev_priv)) - intel_guc_submission_fini(guc); -err_guc: - intel_guc_fini(guc); err_out: /* * Note that there is no fallback as either user explicitly asked for @@ -348,15 +371,13 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) if (!USES_GUC(dev_priv)) return; + GEM_BUG_ON(!HAS_GUC(dev_priv)); + if (USES_GUC_SUBMISSION(dev_priv)) intel_guc_submission_disable(guc); guc_disable_communication(guc); - if (USES_GUC_SUBMISSION(dev_priv)) { + if (USES_GUC_SUBMISSION(dev_priv)) gen9_disable_guc_interrupts(dev_priv); - intel_guc_submission_fini(guc); - } - - intel_guc_fini(guc); } diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index 53edfeaf56b0..8a7249722ef1 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -37,6 +37,8 @@ int intel_uc_init_wq(struct drm_i915_private *dev_priv); void intel_uc_fini_wq(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); +int intel_uc_init(struct drm_i915_private *dev_priv); +void intel_uc_fini(struct drm_i915_private *dev_priv); static inline bool intel_uc_is_using_guc(void) { From aeb950bd97e1f66a109430e05a3066e547bce916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 13 Dec 2017 23:13:49 +0100 Subject: [PATCH 80/87] drm/i915/guc: Call invalidate after changing the vfunc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make this operation a bit cleaner, we should make sure that the HW can catch up by calling the new implementation right away. Note that currently we're only touching the vfunc at module load time (before GuC is even loaded), so this shouldn't cause any functional changes. Suggested-by: Chris Wilson Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Michal Wajdeczko Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-4-michal.winiarski@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a0579b0c8581..c5f393870532 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3552,6 +3552,8 @@ void i915_ggtt_enable_guc(struct drm_i915_private *i915) GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate); i915->ggtt.invalidate = guc_ggtt_invalidate; + + i915_ggtt_invalidate(i915); } void i915_ggtt_disable_guc(struct drm_i915_private *i915) @@ -3560,6 +3562,8 @@ void i915_ggtt_disable_guc(struct drm_i915_private *i915) GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate); i915->ggtt.invalidate = gen6_ggtt_invalidate; + + i915_ggtt_invalidate(i915); } void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) From 9192d4fb811e1e7d07c8fa1a576d2935826daa91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 13 Dec 2017 23:13:50 +0100 Subject: [PATCH 81/87] drm/i915/guc: Extract doorbell creation from client allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full GPU reset causes GuC to be reset. This means that every time we're doing a reset, we need to talk to GuC and tell it about doorbells. Let's separate the communication part (create_doorbell) from our internal bookkeeping (reserve_doorbell) so that we can cleanly separate the initialization done at module load from reinitialization done at reset in the following patch. While I'm here, let's also add a proper (although slightly asymetric) cleanup that doesn't try to communicate with GuC after it's already gone, getting rid of "expected" warnings caused by GuC action failures on module unload. Note that I've also removed one of the tests (bitmap out of sync), since it doesn't make much sense anymore - bitmaps are now not expected to change during the lifetime of a client. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Michal Wajdeczko Cc: Michel Thierry Reviewed-by: Michel Thierry Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-5-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_guc_submission.c | 151 ++++++-------------- drivers/gpu/drm/i915/selftests/intel_guc.c | 110 ++++++-------- 2 files changed, 88 insertions(+), 173 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 8f4b274d66a7..c74e78b6ba41 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -88,7 +88,7 @@ static inline bool is_high_priority(struct intel_guc_client *client) client->priority == GUC_CLIENT_PRIORITY_HIGH); } -static int __reserve_doorbell(struct intel_guc_client *client) +static int reserve_doorbell(struct intel_guc_client *client) { unsigned long offset; unsigned long end; @@ -120,7 +120,7 @@ static int __reserve_doorbell(struct intel_guc_client *client) return 0; } -static void __unreserve_doorbell(struct intel_guc_client *client) +static void unreserve_doorbell(struct intel_guc_client *client) { GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); @@ -188,32 +188,21 @@ static bool has_doorbell(struct intel_guc_client *client) return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); } -static int __create_doorbell(struct intel_guc_client *client) +static void __create_doorbell(struct intel_guc_client *client) { struct guc_doorbell_info *doorbell; - int err; doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_ENABLED; doorbell->cookie = 0; - - err = __guc_allocate_doorbell(client->guc, client->stage_id); - if (err) { - doorbell->db_status = GUC_DOORBELL_DISABLED; - DRM_ERROR("Couldn't create client %u doorbell: %d\n", - client->stage_id, err); - } - - return err; } -static int __destroy_doorbell(struct intel_guc_client *client) +static void __destroy_doorbell(struct intel_guc_client *client) { struct drm_i915_private *dev_priv = guc_to_i915(client->guc); struct guc_doorbell_info *doorbell; u16 db_id = client->doorbell_id; - GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID); doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_DISABLED; @@ -225,50 +214,42 @@ static int __destroy_doorbell(struct intel_guc_client *client) */ if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10)) WARN_ONCE(true, "Doorbell never became invalid after disable\n"); - - return __guc_deallocate_doorbell(client->guc, client->stage_id); } static int create_doorbell(struct intel_guc_client *client) { int ret; - ret = __reserve_doorbell(client); - if (ret) - return ret; - __update_doorbell_desc(client, client->doorbell_id); + __create_doorbell(client); - ret = __create_doorbell(client); - if (ret) - goto err; + ret = __guc_allocate_doorbell(client->guc, client->stage_id); + if (ret) { + __destroy_doorbell(client); + __update_doorbell_desc(client, GUC_DOORBELL_INVALID); + DRM_ERROR("Couldn't create client %u doorbell: %d\n", + client->stage_id, ret); + return ret; + } return 0; - -err: - __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - __unreserve_doorbell(client); - return ret; } static int destroy_doorbell(struct intel_guc_client *client) { - int err; + int ret; GEM_BUG_ON(!has_doorbell(client)); - /* XXX: wait for any interrupts */ - /* XXX: wait for workqueue to drain */ - - err = __destroy_doorbell(client); - if (err) - return err; + __destroy_doorbell(client); + ret = __guc_deallocate_doorbell(client->guc, client->stage_id); + if (ret) + DRM_ERROR("Couldn't destroy client %u doorbell: %d\n", + client->stage_id, ret); __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - __unreserve_doorbell(client); - - return 0; + return ret; } static unsigned long __select_cacheline(struct intel_guc *guc) @@ -839,73 +820,18 @@ static bool doorbell_ok(struct intel_guc *guc, u16 db_id) return false; } -/* - * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and - * reloaded the GuC FW) we can use this function to tell the GuC to reassign the - * doorbell to the rightful owner. - */ -static int __reset_doorbell(struct intel_guc_client *client, u16 db_id) +static int guc_clients_doorbell_init(struct intel_guc *guc) { - int err; - - __update_doorbell_desc(client, db_id); - err = __create_doorbell(client); - if (!err) - err = __destroy_doorbell(client); - - return err; -} - -/* - * Set up & tear down each unused doorbell in turn, to ensure that all doorbell - * HW is (re)initialised. For that end, we might have to borrow the first - * client. Also, tell GuC about all the doorbells in use by all clients. - * We do this because the KMD, the GuC and the doorbell HW can easily go out of - * sync (e.g. we can reset the GuC, but not the doorbel HW). - */ -static int guc_init_doorbell_hw(struct intel_guc *guc) -{ - struct intel_guc_client *client = guc->execbuf_client; - bool recreate_first_client = false; u16 db_id; int ret; - /* For unused doorbells, make sure they are disabled */ - for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) { - if (doorbell_ok(guc, db_id)) - continue; - - if (has_doorbell(client)) { - /* Borrow execbuf_client (we will recreate it later) */ - destroy_doorbell(client); - recreate_first_client = true; - } - - ret = __reset_doorbell(client, db_id); - WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret); - } - - if (recreate_first_client) { - ret = __reserve_doorbell(client); - if (unlikely(ret)) { - DRM_ERROR("Couldn't re-reserve first client db: %d\n", - ret); - return ret; - } - - __update_doorbell_desc(client, client->doorbell_id); - } - - /* Now for every client (and not only execbuf_client) make sure their - * doorbells are known by the GuC - */ - ret = __create_doorbell(guc->execbuf_client); + ret = create_doorbell(guc->execbuf_client); if (ret) return ret; - ret = __create_doorbell(guc->preempt_client); + ret = create_doorbell(guc->preempt_client); if (ret) { - __destroy_doorbell(guc->execbuf_client); + destroy_doorbell(guc->execbuf_client); return ret; } @@ -916,6 +842,19 @@ static int guc_init_doorbell_hw(struct intel_guc *guc) return 0; } +static void guc_clients_doorbell_fini(struct intel_guc *guc) +{ + /* + * By the time we're here, GuC has already been reset. + * Instead of trying (in vain) to communicate with it, let's just + * cleanup the doorbell HW and our internal state. + */ + __destroy_doorbell(guc->preempt_client); + __update_doorbell_desc(guc->preempt_client, GUC_DOORBELL_INVALID); + __destroy_doorbell(guc->execbuf_client); + __update_doorbell_desc(guc->execbuf_client, GUC_DOORBELL_INVALID); +} + /** * guc_client_alloc() - Allocate an intel_guc_client * @dev_priv: driver private data structure @@ -991,7 +930,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv, guc_proc_desc_init(guc, client); guc_stage_desc_init(guc, client); - ret = create_doorbell(client); + ret = reserve_doorbell(client); if (ret) goto err_vaddr; @@ -1015,16 +954,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv, static void guc_client_free(struct intel_guc_client *client) { - /* - * XXX: wait for any outstanding submissions before freeing memory. - * Be sure to drop any locks - */ - - /* FIXME: in many cases, by the time we get here the GuC has been - * reset, so we cannot destroy the doorbell properly. Ignore the - * error message for now - */ - destroy_doorbell(client); + unreserve_doorbell(client); guc_stage_desc_fini(client->guc, client); i915_gem_object_unpin_map(client->vma->obj); i915_vma_unpin_and_release(&client->vma); @@ -1366,7 +1296,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) if (err) goto err_free_clients; - err = guc_init_doorbell_hw(guc); + err = guc_clients_doorbell_init(guc); if (err) goto err_free_clients; @@ -1398,6 +1328,7 @@ void intel_guc_submission_disable(struct intel_guc *guc) GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */ guc_interrupts_release(dev_priv); + guc_clients_doorbell_fini(guc); /* Revert back to manual ELSP submission */ intel_engines_reset_default_submission(dev_priv); diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c index 68d6a69c738f..3f9016466dea 100644 --- a/drivers/gpu/drm/i915/selftests/intel_guc.c +++ b/drivers/gpu/drm/i915/selftests/intel_guc.c @@ -85,21 +85,26 @@ static int validate_client(struct intel_guc_client *client, return 0; } +static bool client_doorbell_in_sync(struct intel_guc_client *client) +{ + return doorbell_ok(client->guc, client->doorbell_id); +} + /* - * Check that guc_init_doorbell_hw is doing what it should. + * Check that we're able to synchronize guc_clients with their doorbells * - * During GuC submission enable, we create GuC clients and their doorbells, - * but after resetting the microcontroller (resume & gpu reset), these - * GuC clients are still around, but the status of their doorbells may be - * incorrect. This is the reason behind validating that the doorbells status - * expected by the driver matches what the GuC/HW have. + * We're creating clients and reserving doorbells once, at module load. During + * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to + * GuC being reset. In other words - GuC clients are still around, but the + * status of their doorbells may be incorrect. This is the reason behind + * validating that the doorbells status expected by the driver matches what the + * GuC/HW have. */ -static int igt_guc_init_doorbell_hw(void *args) +static int igt_guc_clients(void *args) { struct drm_i915_private *dev_priv = args; struct intel_guc *guc; - DECLARE_BITMAP(db_bitmap_bk, GUC_NUM_DOORBELLS); - int i, err = 0; + int err = 0; GEM_BUG_ON(!HAS_GUC(dev_priv)); mutex_lock(&dev_priv->drm.struct_mutex); @@ -148,10 +153,21 @@ static int igt_guc_init_doorbell_hw(void *args) goto out; } - /* each client should have received a doorbell during alloc */ + /* each client should now have reserved a doorbell */ if (!has_doorbell(guc->execbuf_client) || !has_doorbell(guc->preempt_client)) { - pr_err("guc_clients_create didn't create doorbells\n"); + pr_err("guc_clients_create didn't reserve doorbells\n"); + err = -EINVAL; + goto out; + } + + /* Now create the doorbells */ + guc_clients_doorbell_init(guc); + + /* each client should now have received a doorbell */ + if (!client_doorbell_in_sync(guc->execbuf_client) || + !client_doorbell_in_sync(guc->preempt_client)) { + pr_err("failed to initialize the doorbells\n"); err = -EINVAL; goto out; } @@ -160,25 +176,26 @@ static int igt_guc_init_doorbell_hw(void *args) * Basic test - an attempt to reallocate a valid doorbell to the * client it is currently assigned should not cause a failure. */ - err = guc_init_doorbell_hw(guc); + err = guc_clients_doorbell_init(guc); if (err) goto out; /* * Negative test - a client with no doorbell (invalid db id). - * Each client gets a doorbell when it is created, after destroying - * the doorbell, the db id is changed to GUC_DOORBELL_INVALID and the - * firmware will reject any attempt to allocate a doorbell with an - * invalid id (db has to be reserved before allocation). + * After destroying the doorbell, the db id is changed to + * GUC_DOORBELL_INVALID and the firmware will reject any attempt to + * allocate a doorbell with an invalid id (db has to be reserved before + * allocation). */ destroy_doorbell(guc->execbuf_client); - if (has_doorbell(guc->execbuf_client)) { + if (client_doorbell_in_sync(guc->execbuf_client)) { pr_err("destroy db did not work\n"); err = -EINVAL; goto out; } - err = guc_init_doorbell_hw(guc); + unreserve_doorbell(guc->execbuf_client); + err = guc_clients_doorbell_init(guc); if (err != -EIO) { pr_err("unexpected (err = %d)", err); goto out; @@ -191,43 +208,26 @@ static int igt_guc_init_doorbell_hw(void *args) } /* clean after test */ + err = reserve_doorbell(guc->execbuf_client); + if (err) { + pr_err("failed to reserve back the doorbell back\n"); + } err = create_doorbell(guc->execbuf_client); if (err) { pr_err("recreate doorbell failed\n"); goto out; } - /* - * Negative test - doorbell_bitmap out of sync, will trigger a few of - * WARN_ON(!doorbell_ok(guc, db_id)) but that's ok as long as the - * doorbells from our clients don't fail. - */ - bitmap_copy(db_bitmap_bk, guc->doorbell_bitmap, GUC_NUM_DOORBELLS); - for (i = 0; i < GUC_NUM_DOORBELLS; i++) - if (i % 2) - test_and_change_bit(i, guc->doorbell_bitmap); - - err = guc_init_doorbell_hw(guc); - if (err) { - pr_err("out of sync doorbell caused an error\n"); - goto out; - } - - /* restore 'correct' db bitmap */ - bitmap_copy(guc->doorbell_bitmap, db_bitmap_bk, GUC_NUM_DOORBELLS); - err = guc_init_doorbell_hw(guc); - if (err) { - pr_err("restored doorbell caused an error\n"); - goto out; - } - out: /* * Leave clean state for other test, plus the driver always destroy the * clients during unload. */ + destroy_doorbell(guc->execbuf_client); + destroy_doorbell(guc->preempt_client); guc_clients_destroy(guc); guc_clients_create(guc); + guc_clients_doorbell_init(guc); unlock: mutex_unlock(&dev_priv->drm.struct_mutex); return err; @@ -309,25 +309,7 @@ static int igt_guc_doorbells(void *arg) db_id = clients[i]->doorbell_id; - /* - * Client alloc gives us a doorbell, but we want to exercise - * this ourselves (this resembles guc_init_doorbell_hw) - */ - destroy_doorbell(clients[i]); - if (clients[i]->doorbell_id != GUC_DOORBELL_INVALID) { - pr_err("[%d] destroy db did not work!\n", i); - err = -EINVAL; - goto out; - } - - err = __reserve_doorbell(clients[i]); - if (err) { - pr_err("[%d] Failed to reserve a doorbell\n", i); - goto out; - } - - __update_doorbell_desc(clients[i], clients[i]->doorbell_id); - err = __create_doorbell(clients[i]); + err = create_doorbell(clients[i]); if (err) { pr_err("[%d] Failed to create a doorbell\n", i); goto out; @@ -348,8 +330,10 @@ static int igt_guc_doorbells(void *arg) out: for (i = 0; i < ATTEMPTS; i++) - if (!IS_ERR_OR_NULL(clients[i])) + if (!IS_ERR_OR_NULL(clients[i])) { + destroy_doorbell(clients[i]); guc_client_free(clients[i]); + } unlock: mutex_unlock(&dev_priv->drm.struct_mutex); return err; @@ -358,7 +342,7 @@ static int igt_guc_doorbells(void *arg) int intel_guc_live_selftest(struct drm_i915_private *dev_priv) { static const struct i915_subtest tests[] = { - SUBTEST(igt_guc_init_doorbell_hw), + SUBTEST(igt_guc_clients), SUBTEST(igt_guc_doorbells), }; From 8ec52ec8dc6827ea7aed524ed5af7aedcfbc553b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 13 Dec 2017 23:13:51 +0100 Subject: [PATCH 82/87] drm/i915/guc: Extract clients allocation to submission_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can now move the clients allocation to submission_init path, rather than keeping the condition inside submission_enable called on every reset. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Michal Wajdeczko Reviewed-by: Michel Thierry Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-6-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_guc_submission.c | 33 +++++++-------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index c74e78b6ba41..488110602e7e 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -1149,6 +1149,10 @@ int intel_guc_submission_init(struct intel_guc *guc) goto err_log; GEM_BUG_ON(!guc->ads_vma); + ret = guc_clients_create(guc); + if (ret) + return ret; + for_each_engine(engine, dev_priv, id) { guc->preempt_work[id].engine = engine; INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context); @@ -1172,6 +1176,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) for_each_engine(engine, dev_priv, id) cancel_work_sync(&guc->preempt_work[id].work); + guc_clients_destroy(guc); guc_ads_destroy(guc); intel_guc_log_destroy(guc); guc_stage_desc_pool_destroy(guc); @@ -1277,28 +1282,18 @@ int intel_guc_submission_enable(struct intel_guc *guc) sizeof(struct guc_wq_item) * I915_NUM_ENGINES > GUC_WQ_SIZE); - /* - * We're being called on both module initialization and on reset, - * until this flow is changed, we're using regular client presence to - * determine which case are we in, and whether we should allocate new - * clients or just reset their workqueues. - */ - if (!guc->execbuf_client) { - err = guc_clients_create(guc); - if (err) - return err; - } else { - guc_reset_wq(guc->execbuf_client); - guc_reset_wq(guc->preempt_client); - } + GEM_BUG_ON(!guc->execbuf_client); + + guc_reset_wq(guc->execbuf_client); + guc_reset_wq(guc->preempt_client); err = intel_guc_sample_forcewake(guc); if (err) - goto err_free_clients; + return err; err = guc_clients_doorbell_init(guc); if (err) - goto err_free_clients; + return err; /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(dev_priv); @@ -1315,10 +1310,6 @@ int intel_guc_submission_enable(struct intel_guc *guc) } return 0; - -err_free_clients: - guc_clients_destroy(guc); - return err; } void intel_guc_submission_disable(struct intel_guc *guc) @@ -1332,8 +1323,6 @@ void intel_guc_submission_disable(struct intel_guc *guc) /* Revert back to manual ELSP submission */ intel_engines_reset_default_submission(dev_priv); - - guc_clients_destroy(guc); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) From a00cfc959ccbda94873f49bf4494edc00200d05d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Wed, 13 Dec 2017 23:13:52 +0100 Subject: [PATCH 83/87] drm/i915/guc: Extract doorbell verification into a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have the selftest that's checking doorbell create/destroy, so there's no need to check all doorbells delaying the reset every time. We do want to have that extra sanity check at module load/unload though. Signed-off-by: Michał Winiarski Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Reviewed-by: Michel Thierry Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171213221352.7173-7-michal.winiarski@intel.com --- drivers/gpu/drm/i915/intel_guc_submission.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 488110602e7e..4d2409466a3a 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -820,9 +820,19 @@ static bool doorbell_ok(struct intel_guc *guc, u16 db_id) return false; } -static int guc_clients_doorbell_init(struct intel_guc *guc) +static bool guc_verify_doorbells(struct intel_guc *guc) { u16 db_id; + + for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) + if (!doorbell_ok(guc, db_id)) + return false; + + return true; +} + +static int guc_clients_doorbell_init(struct intel_guc *guc) +{ int ret; ret = create_doorbell(guc->execbuf_client); @@ -835,10 +845,6 @@ static int guc_clients_doorbell_init(struct intel_guc *guc) return ret; } - /* Read back & verify all (used & unused) doorbell registers */ - for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id) - WARN_ON(!doorbell_ok(guc, db_id)); - return 0; } @@ -1149,6 +1155,7 @@ int intel_guc_submission_init(struct intel_guc *guc) goto err_log; GEM_BUG_ON(!guc->ads_vma); + WARN_ON(!guc_verify_doorbells(guc)); ret = guc_clients_create(guc); if (ret) return ret; @@ -1177,6 +1184,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) cancel_work_sync(&guc->preempt_work[id].work); guc_clients_destroy(guc); + WARN_ON(!guc_verify_doorbells(guc)); + guc_ads_destroy(guc); intel_guc_log_destroy(guc); guc_stage_desc_pool_destroy(guc); From fe66e92888a1cc0275d98be2d62ba81a097ec3f5 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 13 Dec 2017 17:11:54 +0000 Subject: [PATCH 84/87] drm/i915: make CS frequency read support missing more obvious As suggested by Chris, we should make this more obvious for people working with newer generations. Suggested-by: Chris Wilson Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171213171154.6201-1-lionel.g.landwerlin@intel.com --- drivers/gpu/drm/i915/intel_device_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 405d70124a46..f478be3ae0ba 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -416,7 +416,7 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) return freq; } - DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n"); + MISSING_CASE("Unknown gen, unable to read command streamer timestamp frequency\n"); return 0; } From 84ef3a727e32ac32380e28198c3fdc6dcf941ec6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Dec 2017 12:26:13 +0000 Subject: [PATCH 85/87] drm/i915: Show engine state when hangcheck detects a stall Knowing the state of the engine when hangcheck thinks it is stalling is useful for both debugging hangcheck itself and the potential cause of an unwanted stall. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171214122613.26134-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_hangcheck.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index 95bbb5a79c4f..0acd9dd3ed5c 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -349,13 +349,18 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, case ENGINE_ACTIVE_HEAD: case ENGINE_ACTIVE_SUBUNITS: - /* Seqno stuck with still active engine gets leeway, + /* + * Seqno stuck with still active engine gets leeway, * in hopes that it is just a long shader. */ timeout = I915_SEQNO_DEAD_TIMEOUT; break; case ENGINE_DEAD: + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer("hangcheck"); + intel_engine_dump(engine, &p, "%s", engine->name); + } break; default: From 8d8c46fad4a15cd8a9811ab74a14de0ee1d6c66b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 14 Dec 2017 15:10:09 +0200 Subject: [PATCH 86/87] drm/i915: properly init lockdep class The code has an ifdef and uses two functions to either init the bare spinlock or init it and set a lock-class. It is possible to do the same thing without an ifdef. With this patch (in debug case) we first use the "default" lock class which is later overwritten to the supplied one. Without lockdep the set name/class function vanishes. Reported-by: kbuild test robot Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171214131009.7479-1-joonas.lahtinen@linux.intel.com Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_timeline.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c index c01905d6450c..e9fd87604067 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -33,11 +33,8 @@ static void __intel_timeline_init(struct intel_timeline *tl, { tl->fence_context = context; tl->common = parent; -#ifdef CONFIG_DEBUG_SPINLOCK - __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); -#else spin_lock_init(&tl->lock); -#endif + lockdep_set_class_and_name(&tl->lock, lockclass, lockname); init_request_active(&tl->last_request, NULL); INIT_LIST_HEAD(&tl->requests); i915_syncmap_init(&tl->sync); From ee5b5bf351ec8cd8f11c631cb76b30f602e866ee Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 14 Dec 2017 12:10:02 -0800 Subject: [PATCH 87/87] drm/i915: Update DRIVER_DATE to 20171214 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7762eabd31c1..1aba5657f5f0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -80,8 +80,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20171201" -#define DRIVER_TIMESTAMP 1512176839 +#define DRIVER_DATE "20171214" +#define DRIVER_TIMESTAMP 1513282202 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions