diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 325618d969fe..ca3d1925beda 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -285,8 +285,8 @@ static int alloc_resource(struct intel_vgpu *vgpu, return 0; no_enough_resource: - gvt_vgpu_err("fail to allocate resource %s\n", item); - gvt_vgpu_err("request %luMB avail %luMB max %luMB taken %luMB\n", + gvt_err("fail to allocate resource %s\n", item); + gvt_err("request %luMB avail %luMB max %luMB taken %luMB\n", BYTES_TO_MB(request), BYTES_TO_MB(avail), BYTES_TO_MB(max), BYTES_TO_MB(taken)); return -ENOSPC; diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 713848c36349..72b97ce525e8 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1382,13 +1382,13 @@ static inline int cmd_address_audit(struct parser_exec_state *s, ret = -EINVAL; goto err; } - } else if ((!vgpu_gmadr_is_valid(s->vgpu, guest_gma)) || - (!vgpu_gmadr_is_valid(s->vgpu, - guest_gma + op_size - 1))) { + } else if (!intel_gvt_ggtt_validate_range(vgpu, guest_gma, op_size)) { ret = -EINVAL; goto err; } + return 0; + err: gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n", s->info->name, guest_gma, op_size); @@ -2647,7 +2647,7 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) return 0; } -int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) +int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload) { int ret; struct intel_vgpu *vgpu = workload->vgpu; diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.h b/drivers/gpu/drm/i915/gvt/cmd_parser.h index bed33514103c..286703643002 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.h +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.h @@ -42,7 +42,7 @@ void intel_gvt_clean_cmd_parser(struct intel_gvt *gvt); int intel_gvt_init_cmd_parser(struct intel_gvt *gvt); -int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload); +int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload); int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 700050556242..df11f69edc05 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -605,6 +605,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, struct list_head *q = workload_q_head(vgpu, ring_id); struct intel_vgpu_workload *last_workload = get_last_workload(q); struct intel_vgpu_workload *workload = NULL; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; u64 ring_context_gpa; u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; int ret; @@ -668,6 +669,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, workload->complete = complete_execlist_workload; workload->status = -EINPROGRESS; workload->emulate_schedule_in = emulate_schedule_in; + workload->shadowed = false; if (ring_id == RCS) { intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + @@ -701,6 +703,17 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, return ret; } + /* Only scan and shadow the first workload in the queue + * as there is only one pre-allocated buf-obj for shadow. + */ + if (list_empty(workload_q_head(vgpu, ring_id))) { + intel_runtime_pm_get(dev_priv); + mutex_lock(&dev_priv->drm.struct_mutex); + intel_gvt_scan_and_shadow_workload(workload); + mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(dev_priv); + } + queue_workload(workload); return 0; } @@ -783,6 +796,8 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) list_del_init(&pos->list); free_workload(pos); } + + clear_bit(engine->id, vgpu->shadow_ctx_desc_updated); } } diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 6166e34d892b..e6dfc3331f4b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -259,7 +259,7 @@ static void write_pte64(struct drm_i915_private *dev_priv, writeq(pte, addr); } -static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt, +static inline int gtt_get_entry64(void *pt, struct intel_gvt_gtt_entry *e, unsigned long index, bool hypervisor_access, unsigned long gpa, struct intel_vgpu *vgpu) @@ -268,22 +268,23 @@ static inline struct intel_gvt_gtt_entry *gtt_get_entry64(void *pt, int ret; if (WARN_ON(info->gtt_entry_size != 8)) - return e; + return -EINVAL; if (hypervisor_access) { ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa + (index << info->gtt_entry_size_shift), &e->val64, 8); - WARN_ON(ret); + if (WARN_ON(ret)) + return ret; } else if (!pt) { e->val64 = read_pte64(vgpu->gvt->dev_priv, index); } else { e->val64 = *((u64 *)pt + index); } - return e; + return 0; } -static inline struct intel_gvt_gtt_entry *gtt_set_entry64(void *pt, +static inline int gtt_set_entry64(void *pt, struct intel_gvt_gtt_entry *e, unsigned long index, bool hypervisor_access, unsigned long gpa, struct intel_vgpu *vgpu) @@ -292,19 +293,20 @@ static inline struct intel_gvt_gtt_entry *gtt_set_entry64(void *pt, int ret; if (WARN_ON(info->gtt_entry_size != 8)) - return e; + return -EINVAL; if (hypervisor_access) { ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa + (index << info->gtt_entry_size_shift), &e->val64, 8); - WARN_ON(ret); + if (WARN_ON(ret)) + return ret; } else if (!pt) { write_pte64(vgpu->gvt->dev_priv, index, e->val64); } else { *((u64 *)pt + index) = e->val64; } - return e; + return 0; } #define GTT_HAW 46 @@ -445,21 +447,25 @@ static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p, /* * MM helpers. */ -struct intel_gvt_gtt_entry *intel_vgpu_mm_get_entry(struct intel_vgpu_mm *mm, +int intel_vgpu_mm_get_entry(struct intel_vgpu_mm *mm, void *page_table, struct intel_gvt_gtt_entry *e, unsigned long index) { struct intel_gvt *gvt = mm->vgpu->gvt; struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; + int ret; e->type = mm->page_table_entry_type; - ops->get_entry(page_table, e, index, false, 0, mm->vgpu); + ret = ops->get_entry(page_table, e, index, false, 0, mm->vgpu); + if (ret) + return ret; + ops->test_pse(e); - return e; + return 0; } -struct intel_gvt_gtt_entry *intel_vgpu_mm_set_entry(struct intel_vgpu_mm *mm, +int intel_vgpu_mm_set_entry(struct intel_vgpu_mm *mm, void *page_table, struct intel_gvt_gtt_entry *e, unsigned long index) { @@ -472,7 +478,7 @@ struct intel_gvt_gtt_entry *intel_vgpu_mm_set_entry(struct intel_vgpu_mm *mm, /* * PPGTT shadow page table helpers. */ -static inline struct intel_gvt_gtt_entry *ppgtt_spt_get_entry( +static inline int ppgtt_spt_get_entry( struct intel_vgpu_ppgtt_spt *spt, void *page_table, int type, struct intel_gvt_gtt_entry *e, unsigned long index, @@ -480,20 +486,24 @@ static inline struct intel_gvt_gtt_entry *ppgtt_spt_get_entry( { struct intel_gvt *gvt = spt->vgpu->gvt; struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; + int ret; e->type = get_entry_type(type); if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) - return e; + return -EINVAL; - ops->get_entry(page_table, e, index, guest, + ret = ops->get_entry(page_table, e, index, guest, spt->guest_page.gfn << GTT_PAGE_SHIFT, spt->vgpu); + if (ret) + return ret; + ops->test_pse(e); - return e; + return 0; } -static inline struct intel_gvt_gtt_entry *ppgtt_spt_set_entry( +static inline int ppgtt_spt_set_entry( struct intel_vgpu_ppgtt_spt *spt, void *page_table, int type, struct intel_gvt_gtt_entry *e, unsigned long index, @@ -503,7 +513,7 @@ static inline struct intel_gvt_gtt_entry *ppgtt_spt_set_entry( struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops; if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n")) - return e; + return -EINVAL; return ops->set_entry(page_table, e, index, guest, spt->guest_page.gfn << GTT_PAGE_SHIFT, @@ -792,13 +802,13 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( #define for_each_present_guest_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); i++) \ - if (spt->vgpu->gvt->gtt.pte_ops->test_present( \ - ppgtt_get_guest_entry(spt, e, i))) + if (!ppgtt_get_guest_entry(spt, e, i) && \ + spt->vgpu->gvt->gtt.pte_ops->test_present(e)) #define for_each_present_shadow_entry(spt, e, i) \ for (i = 0; i < pt_entries(spt); i++) \ - if (spt->vgpu->gvt->gtt.pte_ops->test_present( \ - ppgtt_get_shadow_entry(spt, e, i))) + if (!ppgtt_get_shadow_entry(spt, e, i) && \ + spt->vgpu->gvt->gtt.pte_ops->test_present(e)) static void ppgtt_get_shadow_page(struct intel_vgpu_ppgtt_spt *spt) { @@ -979,29 +989,26 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) } static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, - unsigned long index) + struct intel_gvt_gtt_entry *se, unsigned long index) { struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu_shadow_page *sp = &spt->shadow_page; struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - struct intel_gvt_gtt_entry e; int ret; - ppgtt_get_shadow_entry(spt, &e, index); - - trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, e.val64, + trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, se->val64, index); - if (!ops->test_present(&e)) + if (!ops->test_present(se)) return 0; - if (ops->get_pfn(&e) == vgpu->gtt.scratch_pt[sp->type].page_mfn) + if (ops->get_pfn(se) == vgpu->gtt.scratch_pt[sp->type].page_mfn) return 0; - if (gtt_type_is_pt(get_next_pt_type(e.type))) { + if (gtt_type_is_pt(get_next_pt_type(se->type))) { struct intel_vgpu_ppgtt_spt *s = - ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e)); + ppgtt_find_shadow_page(vgpu, ops->get_pfn(se)); if (!s) { gvt_vgpu_err("fail to find guest page\n"); ret = -ENXIO; @@ -1011,12 +1018,10 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, if (ret) goto fail; } - ops->set_pfn(&e, vgpu->gtt.scratch_pt[sp->type].page_mfn); - ppgtt_set_shadow_entry(spt, &e, index); return 0; fail: gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", - spt, e.val64, e.type); + spt, se->val64, se->type); return ret; } @@ -1236,22 +1241,37 @@ static int ppgtt_handle_guest_write_page_table( { struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; + int type = spt->shadow_page.type; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + struct intel_gvt_gtt_entry se; int ret; int new_present; new_present = ops->test_present(we); - ret = ppgtt_handle_guest_entry_removal(gpt, index); - if (ret) - goto fail; + /* + * Adding the new entry first and then removing the old one, that can + * guarantee the ppgtt table is validated during the window between + * adding and removal. + */ + ppgtt_get_shadow_entry(spt, &se, index); if (new_present) { ret = ppgtt_handle_guest_entry_add(gpt, we, index); if (ret) goto fail; } + + ret = ppgtt_handle_guest_entry_removal(gpt, &se, index); + if (ret) + goto fail; + + if (!new_present) { + ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &se, index); + } + return 0; fail: gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n", @@ -1323,7 +1343,7 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp, struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; - struct intel_gvt_gtt_entry we; + struct intel_gvt_gtt_entry we, se; unsigned long index; int ret; @@ -1339,7 +1359,8 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp, return ret; } else { if (!test_bit(index, spt->post_shadow_bitmap)) { - ret = ppgtt_handle_guest_entry_removal(gpt, index); + ppgtt_get_shadow_entry(spt, &se, index); + ret = ppgtt_handle_guest_entry_removal(gpt, &se, index); if (ret) return ret; } @@ -1713,8 +1734,10 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) if (!vgpu_gmadr_is_valid(vgpu, gma)) goto err; - ggtt_get_guest_entry(mm, &e, - gma_ops->gma_to_ggtt_pte_index(gma)); + ret = ggtt_get_guest_entry(mm, &e, + gma_ops->gma_to_ggtt_pte_index(gma)); + if (ret) + goto err; gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) + (gma & ~GTT_PAGE_MASK); @@ -1724,7 +1747,9 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) switch (mm->page_table_level) { case 4: - ppgtt_get_shadow_root_entry(mm, &e, 0); + ret = ppgtt_get_shadow_root_entry(mm, &e, 0); + if (ret) + goto err; gma_index[0] = gma_ops->gma_to_pml4_index(gma); gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma); gma_index[2] = gma_ops->gma_to_pde_index(gma); @@ -1732,15 +1757,19 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) index = 4; break; case 3: - ppgtt_get_shadow_root_entry(mm, &e, + ret = ppgtt_get_shadow_root_entry(mm, &e, gma_ops->gma_to_l3_pdp_index(gma)); + if (ret) + goto err; gma_index[0] = gma_ops->gma_to_pde_index(gma); gma_index[1] = gma_ops->gma_to_pte_index(gma); index = 2; break; case 2: - ppgtt_get_shadow_root_entry(mm, &e, + ret = ppgtt_get_shadow_root_entry(mm, &e, gma_ops->gma_to_pde_index(gma)); + if (ret) + goto err; gma_index[0] = gma_ops->gma_to_pte_index(gma); index = 1; break; @@ -1755,6 +1784,11 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) (i == index - 1)); if (ret) goto err; + + if (!pte_ops->test_present(&e)) { + gvt_dbg_core("GMA 0x%lx is not present\n", gma); + goto err; + } } gpa = (pte_ops->get_pfn(&e) << GTT_PAGE_SHIFT) @@ -2329,13 +2363,12 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) /** * intel_vgpu_reset_gtt - reset the all GTT related status * @vgpu: a vGPU - * @dmlr: true for vGPU Device Model Level Reset, false for GT Reset * * This function is called from vfio core to reset reset all * GTT related status, including GGTT, PPGTT, scratch page. * */ -void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr) +void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) { int i; @@ -2347,9 +2380,6 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr) */ intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); - if (!dmlr) - return; - intel_vgpu_reset_ggtt(vgpu); /* clear scratch page for security */ diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index f88eb5e89bea..30a4c8d16026 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -49,14 +49,18 @@ struct intel_gvt_gtt_entry { }; struct intel_gvt_gtt_pte_ops { - struct intel_gvt_gtt_entry *(*get_entry)(void *pt, - struct intel_gvt_gtt_entry *e, - unsigned long index, bool hypervisor_access, unsigned long gpa, - struct intel_vgpu *vgpu); - struct intel_gvt_gtt_entry *(*set_entry)(void *pt, - struct intel_gvt_gtt_entry *e, - unsigned long index, bool hypervisor_access, unsigned long gpa, - struct intel_vgpu *vgpu); + int (*get_entry)(void *pt, + struct intel_gvt_gtt_entry *e, + unsigned long index, + bool hypervisor_access, + unsigned long gpa, + struct intel_vgpu *vgpu); + int (*set_entry)(void *pt, + struct intel_gvt_gtt_entry *e, + unsigned long index, + bool hypervisor_access, + unsigned long gpa, + struct intel_vgpu *vgpu); bool (*test_present)(struct intel_gvt_gtt_entry *e); void (*clear_present)(struct intel_gvt_gtt_entry *e); bool (*test_pse)(struct intel_gvt_gtt_entry *e); @@ -143,12 +147,12 @@ struct intel_vgpu_mm { struct intel_vgpu *vgpu; }; -extern struct intel_gvt_gtt_entry *intel_vgpu_mm_get_entry( +extern int intel_vgpu_mm_get_entry( struct intel_vgpu_mm *mm, void *page_table, struct intel_gvt_gtt_entry *e, unsigned long index); -extern struct intel_gvt_gtt_entry *intel_vgpu_mm_set_entry( +extern int intel_vgpu_mm_set_entry( struct intel_vgpu_mm *mm, void *page_table, struct intel_gvt_gtt_entry *e, unsigned long index); @@ -208,7 +212,7 @@ extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu); extern int intel_gvt_init_gtt(struct intel_gvt *gvt); -extern void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr); +void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu); extern void intel_gvt_clean_gtt(struct intel_gvt *gvt); extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu, diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 3a74e79eac2f..ea736717e051 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -167,6 +167,7 @@ struct intel_vgpu { atomic_t running_workload_num; DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); struct i915_gem_context *shadow_ctx; + DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) struct { @@ -470,6 +471,8 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa); int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci); void populate_pvinfo_page(struct intel_vgpu *vgpu); +int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload); + struct intel_gvt_ops { int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *, unsigned int); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index aeecf315c5db..d85264d48585 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -113,9 +113,17 @@ static int new_mmio_info(struct intel_gvt *gvt, info->offset = i; p = find_mmio_info(gvt, info->offset); - if (p) - gvt_err("dup mmio definition offset %x\n", + if (p) { + WARN(1, "dup mmio definition offset %x\n", info->offset); + kfree(info); + + /* We return -EEXIST here to make GVT-g load fail. + * So duplicated MMIO can be found as soon as + * possible. + */ + return -EEXIST; + } info->ro_mask = ro_mask; info->device = device; @@ -2583,7 +2591,6 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, force_nonpriv_write); - MMIO_D(0x22040, D_BDW_PLUS); MMIO_D(0x44484, D_BDW_PLUS); MMIO_D(0x4448c, D_BDW_PLUS); @@ -2641,7 +2648,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_PWR_WELL_BIOS, D_SKL_PLUS); MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL_PLUS, NULL, skl_power_well_ctl_write); - MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL_PLUS, NULL, mailbox_write); MMIO_D(0xa210, D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); @@ -2833,7 +2839,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x320f0, D_SKL | D_KBL); MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(_REG_VECS_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x70034, D_SKL_PLUS); MMIO_D(0x71034, D_SKL_PLUS); MMIO_D(0x72034, D_SKL_PLUS); @@ -2851,10 +2856,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) NULL, NULL); MMIO_D(0x4ab8, D_KBL); - MMIO_D(0x940c, D_SKL_PLUS); MMIO_D(0x2248, D_SKL_PLUS | D_KBL); - MMIO_D(0x4ab0, D_SKL | D_KBL); - MMIO_D(0x20d4, D_SKL | D_KBL); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index fd0c85f9ef3c..83e88c70272a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1170,10 +1170,27 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "\n"); } +static ssize_t +hw_id_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct mdev_device *mdev = mdev_from_dev(dev); + + if (mdev) { + struct intel_vgpu *vgpu = (struct intel_vgpu *) + mdev_get_drvdata(mdev); + return sprintf(buf, "%u\n", + vgpu->shadow_ctx->hw_id); + } + return sprintf(buf, "\n"); +} + static DEVICE_ATTR_RO(vgpu_id); +static DEVICE_ATTR_RO(hw_id); static struct attribute *intel_vgpu_attrs[] = { &dev_attr_vgpu_id.attr, + &dev_attr_hw_id.attr, NULL }; diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 504e57c3bc23..2ea542257f03 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -207,18 +207,16 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { - gen9_render_mocs[ring_id][i] = I915_READ(offset); + gen9_render_mocs[ring_id][i] = I915_READ_FW(offset); I915_WRITE(offset, vgpu_vreg(vgpu, offset)); - POSTING_READ(offset); offset.reg += 4; } if (ring_id == RCS) { l3_offset.reg = 0xb020; for (i = 0; i < 32; i++) { - gen9_render_mocs_L3[i] = I915_READ(l3_offset); - I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset)); - POSTING_READ(l3_offset); + gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset); + I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset)); l3_offset.reg += 4; } } @@ -242,18 +240,16 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { - vgpu_vreg(vgpu, offset) = I915_READ(offset); - I915_WRITE(offset, gen9_render_mocs[ring_id][i]); - POSTING_READ(offset); + vgpu_vreg(vgpu, offset) = I915_READ_FW(offset); + I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]); offset.reg += 4; } if (ring_id == RCS) { l3_offset.reg = 0xb020; for (i = 0; i < 32; i++) { - vgpu_vreg(vgpu, l3_offset) = I915_READ(l3_offset); - I915_WRITE(l3_offset, gen9_render_mocs_L3[i]); - POSTING_READ(l3_offset); + vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset); + I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]); l3_offset.reg += 4; } } @@ -272,6 +268,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL]; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + i915_reg_t last_reg = _MMIO(0); if (IS_SKYLAKE(vgpu->gvt->dev_priv) || IS_KABYLAKE(vgpu->gvt->dev_priv)) { @@ -287,7 +284,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) if (mmio->ring_id != ring_id) continue; - mmio->value = I915_READ(mmio->reg); + mmio->value = I915_READ_FW(mmio->reg); /* * if it is an inhibit context, load in_context mmio @@ -304,13 +301,18 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) else v = vgpu_vreg(vgpu, mmio->reg); - I915_WRITE(mmio->reg, v); - POSTING_READ(mmio->reg); + I915_WRITE_FW(mmio->reg, v); + last_reg = mmio->reg; trace_render_mmio(vgpu->id, "load", i915_mmio_reg_offset(mmio->reg), mmio->value, v); } + + /* Make sure the swiched MMIOs has taken effect. */ + if (likely(INTEL_GVT_MMIO_OFFSET(last_reg))) + I915_READ_FW(last_reg); + handle_tlb_pending_event(vgpu, ring_id); } @@ -319,6 +321,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct render_mmio *mmio; + i915_reg_t last_reg = _MMIO(0); u32 v; int i, array_size; @@ -335,7 +338,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) if (mmio->ring_id != ring_id) continue; - vgpu_vreg(vgpu, mmio->reg) = I915_READ(mmio->reg); + vgpu_vreg(vgpu, mmio->reg) = I915_READ_FW(mmio->reg); if (mmio->mask) { vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16); @@ -346,13 +349,17 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) if (mmio->in_context) continue; - I915_WRITE(mmio->reg, v); - POSTING_READ(mmio->reg); + I915_WRITE_FW(mmio->reg, v); + last_reg = mmio->reg; trace_render_mmio(vgpu->id, "restore", i915_mmio_reg_offset(mmio->reg), mmio->value, v); } + + /* Make sure the swiched MMIOs has taken effect. */ + if (likely(INTEL_GVT_MMIO_OFFSET(last_reg))) + I915_READ_FW(last_reg); } /** @@ -367,12 +374,23 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id) void intel_gvt_switch_mmio(struct intel_vgpu *pre, struct intel_vgpu *next, int ring_id) { + struct drm_i915_private *dev_priv; + if (WARN_ON(!pre && !next)) return; gvt_dbg_render("switch ring %d from %s to %s\n", ring_id, pre ? "vGPU" : "host", next ? "vGPU" : "HOST"); + dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv; + + /** + * We are using raw mmio access wrapper to improve the + * performace for batch mmio read/write, so we need + * handle forcewake mannually. + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + /** * TODO: Optimize for vGPU to vGPU switch by merging * switch_mmio_to_host() and switch_mmio_to_vgpu(). @@ -382,4 +400,6 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, if (next) switch_mmio_to_vgpu(next, ring_id); + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 0e2e36ad6196..025aba8a72e0 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -184,41 +184,52 @@ static int shadow_context_status_change(struct notifier_block *nb, return NOTIFY_OK; } -static int dispatch_workload(struct intel_vgpu_workload *workload) +static void shadow_context_descriptor_update(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct intel_context *ce = &ctx->engine[engine->id]; + u64 desc = 0; + + desc = ce->lrc_desc; + + /* Update bits 0-11 of the context descriptor which includes flags + * like GEN8_CTX_* cached in desc_template + */ + desc &= U64_MAX << 12; + desc |= ctx->desc_template & ((1ULL << 12) - 1); + + ce->lrc_desc = desc; +} + +/** + * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and + * shadow it as well, include ringbuffer,wa_ctx and ctx. + * @workload: an abstract entity for each execlist submission. + * + * This function is called before the workload submitting to i915, to make + * sure the content of the workload is valid. + */ +int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct drm_i915_gem_request *rq; struct intel_vgpu *vgpu = workload->vgpu; - struct intel_ring *ring; int ret; - gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", - ring_id, workload); + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (workload->shadowed) + return 0; shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - mutex_lock(&dev_priv->drm.struct_mutex); - - /* pin shadow context by gvt even the shadow context will be pinned - * when i915 alloc request. That is because gvt will update the guest - * context from shadow context when workload is completed, and at that - * moment, i915 may already unpined the shadow context to make the - * shadow_ctx pages invalid. So gvt need to pin itself. After update - * the guest context, gvt can unpin the shadow_ctx safely. - */ - ring = engine->context_pin(engine, shadow_ctx); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - gvt_vgpu_err("fail to pin shadow context\n"); - workload->status = ret; - mutex_unlock(&dev_priv->drm.struct_mutex); - return ret; - } + if (!test_and_set_bit(ring_id, vgpu->shadow_ctx_desc_updated)) + shadow_context_descriptor_update(shadow_ctx, + dev_priv->engine[ring_id]); rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { @@ -231,7 +242,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) workload->req = i915_gem_request_get(rq); - ret = intel_gvt_scan_and_shadow_workload(workload); + ret = intel_gvt_scan_and_shadow_ringbuffer(workload); if (ret) goto out; @@ -246,25 +257,61 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) if (ret) goto out; + workload->shadowed = true; + +out: + return ret; +} + +static int dispatch_workload(struct intel_vgpu_workload *workload) +{ + int ring_id = workload->ring_id; + struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_ring *ring; + int ret = 0; + + gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", + ring_id, workload); + + mutex_lock(&dev_priv->drm.struct_mutex); + + ret = intel_gvt_scan_and_shadow_workload(workload); + if (ret) + goto out; + if (workload->prepare) { ret = workload->prepare(workload); if (ret) goto out; } - gvt_dbg_sched("ring id %d submit workload to i915 %p\n", - ring_id, workload->req); + /* pin shadow context by gvt even the shadow context will be pinned + * when i915 alloc request. That is because gvt will update the guest + * context from shadow context when workload is completed, and at that + * moment, i915 may already unpined the shadow context to make the + * shadow_ctx pages invalid. So gvt need to pin itself. After update + * the guest context, gvt can unpin the shadow_ctx safely. + */ + ring = engine->context_pin(engine, shadow_ctx); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); + gvt_vgpu_err("fail to pin shadow context\n"); + goto out; + } - ret = 0; - workload->dispatched = true; out: if (ret) workload->status = ret; - if (!IS_ERR_OR_NULL(rq)) - i915_add_request(rq); - else - engine->context_unpin(engine, shadow_ctx); + if (!IS_ERR_OR_NULL(workload->req)) { + gvt_dbg_sched("ring id %d submit workload to i915 %p\n", + ring_id, workload->req); + i915_add_request(workload->req); + workload->dispatched = true; + } mutex_unlock(&dev_priv->drm.struct_mutex); return ret; @@ -630,5 +677,7 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) vgpu->shadow_ctx->engine[RCS].initialised = true; + bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 9b6bf51e9b9b..0d431a968a32 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -82,6 +82,7 @@ struct intel_vgpu_workload { struct drm_i915_gem_request *req; /* if this workload has been dispatched to i915? */ bool dispatched; + bool shadowed; int status; struct intel_vgpu_mm *shadow_mm; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 90c14e6e3ea0..5896ead8529e 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -43,6 +43,7 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) vgpu_vreg(vgpu, vgtif_reg(version_minor)) = 0; vgpu_vreg(vgpu, vgtif_reg(display_ready)) = 0; vgpu_vreg(vgpu, vgtif_reg(vgt_id)) = vgpu->id; + vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT; vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) = vgpu_aperture_gmadr_base(vgpu); vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) = @@ -502,11 +503,11 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { - intel_vgpu_reset_gtt(vgpu, dmlr); - /*fence will not be reset during virtual reset */ - if (dmlr) + if (dmlr) { + intel_vgpu_reset_gtt(vgpu); intel_vgpu_reset_resource(vgpu); + } intel_vgpu_reset_mmio(vgpu, dmlr); populate_pvinfo_page(vgpu); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ef5af697732d..5aa214a05c51 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1905,6 +1905,7 @@ struct i915_workarounds { struct i915_virtual_gpu { bool active; + u32 caps; }; /* used in computing the new watermarks state */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 10aa7762d9a6..ef1881e256f4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -144,9 +144,9 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt; if (intel_vgpu_active(dev_priv)) { - /* emulation is too hard */ + /* GVT-g has no support for 32bit ppgtt */ has_full_ppgtt = false; - has_full_48bit_ppgtt = false; + has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv); } if (!has_aliasing_ppgtt) @@ -180,10 +180,15 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 0; } - if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) - return has_full_48bit_ppgtt ? 3 : 2; - else - return has_aliasing_ppgtt ? 1 : 0; + if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists) { + if (has_full_48bit_ppgtt) + return 3; + + if (has_full_ppgtt) + return 2; + } + + return has_aliasing_ppgtt ? 1 : 0; } static int ppgtt_bind_vma(struct i915_vma *vma, diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h index 2cfe96d3e5d1..0679a58cdbae 100644 --- a/drivers/gpu/drm/i915/i915_pvinfo.h +++ b/drivers/gpu/drm/i915/i915_pvinfo.h @@ -49,12 +49,18 @@ enum vgt_g2v_type { VGT_G2V_MAX, }; +/* + * VGT capabilities type + */ +#define VGT_CAPS_FULL_48BIT_PPGTT BIT(2) + struct vgt_if { u64 magic; /* VGT_MAGIC */ u16 version_major; u16 version_minor; u32 vgt_id; /* ID of vGT instance */ - u32 rsv1[12]; /* pad to offset 0x40 */ + u32 vgt_caps; /* VGT capabilities */ + u32 rsv1[11]; /* pad to offset 0x40 */ /* * Data structure to describe the balooning info of resources. * Each VM can only have one portion of continuous area for now. diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index cf7a958e4d3c..5fe9f3f39467 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -75,10 +75,17 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) return; } + dev_priv->vgpu.caps = __raw_i915_read32(dev_priv, vgtif_reg(vgt_caps)); + dev_priv->vgpu.active = true; DRM_INFO("Virtual GPU for Intel GVT-g detected.\n"); } +bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv) +{ + return dev_priv->vgpu.caps & VGT_CAPS_FULL_48BIT_PPGTT; +} + struct _balloon_info_ { /* * There are up to 2 regions per mappable/unmappable graphic diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h index 3c3b2d24e830..b72bd2956b70 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.h +++ b/drivers/gpu/drm/i915/i915_vgpu.h @@ -27,6 +27,9 @@ #include "i915_pvinfo.h" void i915_check_vgpu(struct drm_i915_private *dev_priv); + +bool intel_vgpu_has_full_48bit_ppgtt(struct drm_i915_private *dev_priv); + int intel_vgt_balloon(struct drm_i915_private *dev_priv); void intel_vgt_deballoon(struct drm_i915_private *dev_priv);