drm/i915/bdw: Support BDW caching
BDW caching works differently than the previous generations. Instead of having bits in the PTE which directly control how the page is cached, the 3 PTE bits PWT PCD and PAT provide an index into a PAT defined by register 0x40e0. This style of caching is functionally equivalent to how it works on HSW and before. v2: Tiny bikeshed as discussed on internal irc. v3: Squash in patch from Ville to mirror the x86 PAT setup more like in arch/x86/mm/pat.c. Primarily, the 0th index will be WB, and not uncached. v4: Comment for reason to not use a 64b write on the PPAT. v5: Add a FIXME comment that the caching bits in the PAT registers might be wrong due to doc confusion. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1) Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Imre Deak <imre.deak@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
parent
94ec8f6130
commit
fbe5d36e77
|
@ -58,12 +58,21 @@ typedef uint64_t gen8_gtt_pte_t;
|
||||||
#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
|
#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
|
||||||
#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
|
#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
|
||||||
|
|
||||||
|
#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD)
|
||||||
|
#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */
|
||||||
|
#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */
|
||||||
|
#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */
|
||||||
|
|
||||||
static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
|
static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
|
||||||
enum i915_cache_level level,
|
enum i915_cache_level level,
|
||||||
bool valid)
|
bool valid)
|
||||||
{
|
{
|
||||||
gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
|
gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
|
||||||
pte |= addr;
|
pte |= addr;
|
||||||
|
if (level != I915_CACHE_NONE)
|
||||||
|
pte |= PPAT_CACHED_INDEX;
|
||||||
|
else
|
||||||
|
pte |= PPAT_UNCACHED_INDEX;
|
||||||
return pte;
|
return pte;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -806,6 +815,7 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node,
|
||||||
*end -= 4096;
|
*end -= 4096;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void i915_gem_setup_global_gtt(struct drm_device *dev,
|
void i915_gem_setup_global_gtt(struct drm_device *dev,
|
||||||
unsigned long start,
|
unsigned long start,
|
||||||
unsigned long mappable_end,
|
unsigned long mappable_end,
|
||||||
|
@ -1003,6 +1013,39 @@ static int ggtt_probe_common(struct drm_device *dev,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
|
||||||
|
* bits. When using advanced contexts each context stores its own PAT, but
|
||||||
|
* writing this data shouldn't be harmful even in those cases. */
|
||||||
|
static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
|
||||||
|
{
|
||||||
|
#define GEN8_PPAT_UC (0<<0)
|
||||||
|
#define GEN8_PPAT_WC (1<<0)
|
||||||
|
#define GEN8_PPAT_WT (2<<0)
|
||||||
|
#define GEN8_PPAT_WB (3<<0)
|
||||||
|
#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
|
||||||
|
/* FIXME(BDW): Bspec is completely confused about cache control bits. */
|
||||||
|
#define GEN8_PPAT_LLC (1<<2)
|
||||||
|
#define GEN8_PPAT_LLCELLC (2<<2)
|
||||||
|
#define GEN8_PPAT_LLCeLLC (3<<2)
|
||||||
|
#define GEN8_PPAT_AGE(x) (x<<4)
|
||||||
|
#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
|
||||||
|
uint64_t pat;
|
||||||
|
|
||||||
|
pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
|
||||||
|
GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
|
||||||
|
GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
|
||||||
|
GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
|
||||||
|
GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
|
||||||
|
GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
|
||||||
|
GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
|
||||||
|
GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
|
||||||
|
|
||||||
|
/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
|
||||||
|
* write would work. */
|
||||||
|
I915_WRITE(GEN8_PRIVATE_PAT, pat);
|
||||||
|
I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
|
||||||
|
}
|
||||||
|
|
||||||
static int gen8_gmch_probe(struct drm_device *dev,
|
static int gen8_gmch_probe(struct drm_device *dev,
|
||||||
size_t *gtt_total,
|
size_t *gtt_total,
|
||||||
size_t *stolen,
|
size_t *stolen,
|
||||||
|
@ -1028,6 +1071,8 @@ static int gen8_gmch_probe(struct drm_device *dev,
|
||||||
gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
|
gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
|
||||||
*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
|
*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
|
||||||
|
|
||||||
|
gen8_setup_private_ppat(dev_priv);
|
||||||
|
|
||||||
ret = ggtt_probe_common(dev, gtt_size);
|
ret = ggtt_probe_common(dev, gtt_size);
|
||||||
|
|
||||||
dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
|
dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
|
||||||
|
|
|
@ -665,6 +665,7 @@
|
||||||
#define RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3)
|
#define RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3)
|
||||||
#define RING_FAULT_VALID (1<<0)
|
#define RING_FAULT_VALID (1<<0)
|
||||||
#define DONE_REG 0x40b0
|
#define DONE_REG 0x40b0
|
||||||
|
#define GEN8_PRIVATE_PAT 0x40e0
|
||||||
#define BSD_HWS_PGA_GEN7 (0x04180)
|
#define BSD_HWS_PGA_GEN7 (0x04180)
|
||||||
#define BLT_HWS_PGA_GEN7 (0x04280)
|
#define BLT_HWS_PGA_GEN7 (0x04280)
|
||||||
#define VEBOX_HWS_PGA_GEN7 (0x04380)
|
#define VEBOX_HWS_PGA_GEN7 (0x04380)
|
||||||
|
|
Loading…
Reference in New Issue