Commit c0888e9e authored by Ville Syrjälä's avatar Ville Syrjälä Committed by Chris Wilson

drm/i915: Enable eLLC caching of display buffers for SKL+

Since SKL the eLLC has been sitting on the far side of the system
agent, meaning the display engine can utilize it. Let's enable that.

I chose WB for the caching mode, because my numbers are indicating
that WT might actually be WB and WC might actually be UC. I'm not
100% sure that is indeed the case but at least my simple rendercopy
based benchmark didn't see any difference in performance.

Also if I configure things to do LLCeLLC+WT I still get cache dirt
on my screen, suggesting that is in fact operating in WB mode
anyway. This is also the reason I had to fix the MOCS target cache
to really say PTE rather than LLC+eLLC.
Since SKL the eLLC has been sitting on the far side of the system agent,
meaning the display engine can utilize it. Let's enable that.

Eero's earlier benchmarks numbers:
"* Results in GfxBench and Unigine (Valley/Heaven) tests were within daily
   variation on the tested SKL machines

 * SKL GT4e (128MB eLLC) / Wayland / Weston:
   +15-20% SynMark TexMem512 (512MB of textures)
   +4-6% SynMark TerrainFly*, CSCloth, ShMapVsm
   -5-10% SynMark TexMem128 (128MB of textures)

 * SKL GT3e (64MB eLLC) / Xorg / Unity:
   +4-8% GpuTest Triangle fullscreen (FullHD)
   -5-10% GpuTest Triangle windowed (1/2 screen)

 * SKL GT2 (no eLLC) / Xorg / Unity:
   * Some of the higher FPS SynMark pixel and vertex shader tests
     are few percent higher, more than daily variance
   => Do you see any reason why this machine would be impacted
      although it doesn't eLLC?"

Caveats:
- Still haven't tested with a prime setup
- Still not entirely sure this a good idea, but I've been
  using it on my cfl anyway :)

v2: Split the MOCS PTE change out

Cc: Eero Tamminen <eero.t.tamminen@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarVille Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201007120329.17076-3-ville.syrjala@linux.intel.com
Link: https://patchwork.freedesktop.org/patch/msgid/20201015122138.30161-3-chris@chris-wilson.co.uk
parent 36b6b681
...@@ -324,7 +324,7 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore) ...@@ -324,7 +324,7 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore)
GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
intel_uncore_write(uncore, intel_uncore_write(uncore,
GEN10_PAT_INDEX(2), GEN10_PAT_INDEX(2),
GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
intel_uncore_write(uncore, intel_uncore_write(uncore,
GEN10_PAT_INDEX(3), GEN10_PAT_INDEX(3),
GEN8_PPAT_UC); GEN8_PPAT_UC);
...@@ -349,17 +349,23 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore) ...@@ -349,17 +349,23 @@ static void cnl_setup_private_ppat(struct intel_uncore *uncore)
*/ */
static void bdw_setup_private_ppat(struct intel_uncore *uncore) static void bdw_setup_private_ppat(struct intel_uncore *uncore)
{ {
struct drm_i915_private *i915 = uncore->i915;
u64 pat; u64 pat;
pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
/* for scanout with eLLC */
if (INTEL_GEN(i915) >= 9)
pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
else
pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
} }
......
...@@ -1620,8 +1620,7 @@ extern const struct i915_rev_steppings kbl_revids[]; ...@@ -1620,8 +1620,7 @@ extern const struct i915_rev_steppings kbl_revids[];
#define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop)
#define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb)
#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) #define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ #define HAS_WT(dev_priv) HAS_EDRAM(dev_priv)
IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
#define HWS_NEEDS_PHYSICAL(dev_priv) (INTEL_INFO(dev_priv)->hws_needs_physical) #define HWS_NEEDS_PHYSICAL(dev_priv) (INTEL_INFO(dev_priv)->hws_needs_physical)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment