Commit 05b429a8 authored by Rodrigo Vivi's avatar Rodrigo Vivi

Merge tag 'gvt-fixes-2018-03-15' of https://github.com/intel/gvt-linux into drm-intel-fixes

gvt-fixes-2018-03-15

- Two warnings fix for runtime pm and usr copy (Xiong, Zhenyu)
- OA context fix for vGPU profiling (Min)
- privilege batch buffer reloc fix (Fred)
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180315100023.5n5a74afky6qinoh@zhen-hp.sh.intel.com
parents f1430f14 850555d1
...@@ -471,6 +471,7 @@ struct parser_exec_state { ...@@ -471,6 +471,7 @@ struct parser_exec_state {
* used when ret from 2nd level batch buffer * used when ret from 2nd level batch buffer
*/ */
int saved_buf_addr_type; int saved_buf_addr_type;
bool is_ctx_wa;
struct cmd_info *info; struct cmd_info *info;
...@@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s) ...@@ -1715,6 +1716,11 @@ static int perform_bb_shadow(struct parser_exec_state *s)
bb->accessing = true; bb->accessing = true;
bb->bb_start_cmd_va = s->ip_va; bb->bb_start_cmd_va = s->ip_va;
if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
bb->bb_offset = s->ip_va - s->rb_va;
else
bb->bb_offset = 0;
/* /*
* ip_va saves the virtual address of the shadow batch buffer, while * ip_va saves the virtual address of the shadow batch buffer, while
* ip_gma saves the graphics address of the original batch buffer. * ip_gma saves the graphics address of the original batch buffer.
...@@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload) ...@@ -2571,6 +2577,7 @@ static int scan_workload(struct intel_vgpu_workload *workload)
s.ring_tail = gma_tail; s.ring_tail = gma_tail;
s.rb_va = workload->shadow_ring_buffer_va; s.rb_va = workload->shadow_ring_buffer_va;
s.workload = workload; s.workload = workload;
s.is_ctx_wa = false;
if ((bypass_scan_mask & (1 << workload->ring_id)) || if ((bypass_scan_mask & (1 << workload->ring_id)) ||
gma_head == gma_tail) gma_head == gma_tail)
...@@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) ...@@ -2624,6 +2631,7 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
s.ring_tail = gma_tail; s.ring_tail = gma_tail;
s.rb_va = wa_ctx->indirect_ctx.shadow_va; s.rb_va = wa_ctx->indirect_ctx.shadow_va;
s.workload = workload; s.workload = workload;
s.is_ctx_wa = true;
if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) { if (!intel_gvt_ggtt_validate_range(s.vgpu, s.ring_start, s.ring_size)) {
ret = -EINVAL; ret = -EINVAL;
......
...@@ -394,9 +394,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, ...@@ -394,9 +394,11 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
* performace for batch mmio read/write, so we need * performace for batch mmio read/write, so we need
* handle forcewake mannually. * handle forcewake mannually.
*/ */
intel_runtime_pm_get(dev_priv);
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
switch_mmio(pre, next, ring_id); switch_mmio(pre, next, ring_id);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
intel_runtime_pm_put(dev_priv);
} }
/** /**
......
...@@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer( ...@@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
pdp_pair[i].val = pdp[7 - i]; pdp_pair[i].val = pdp[7 - i];
} }
/*
* when populating shadow ctx from guest, we should not overrride oa related
* registers, so that they will not be overlapped by guest oa configs. Thus
* made it possible to capture oa data from host for both host and guests.
*/
static void sr_oa_regs(struct intel_vgpu_workload *workload,
u32 *reg_state, bool save)
{
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
int i = 0;
u32 flex_mmio[] = {
i915_mmio_reg_offset(EU_PERF_CNTL0),
i915_mmio_reg_offset(EU_PERF_CNTL1),
i915_mmio_reg_offset(EU_PERF_CNTL2),
i915_mmio_reg_offset(EU_PERF_CNTL3),
i915_mmio_reg_offset(EU_PERF_CNTL4),
i915_mmio_reg_offset(EU_PERF_CNTL5),
i915_mmio_reg_offset(EU_PERF_CNTL6),
};
if (!workload || !reg_state || workload->ring_id != RCS)
return;
if (save) {
workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
u32 state_offset = ctx_flexeu0 + i * 2;
workload->flex_mmio[i] = reg_state[state_offset + 1];
}
} else {
reg_state[ctx_oactxctrl] =
i915_mmio_reg_offset(GEN8_OACTXCONTROL);
reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
u32 state_offset = ctx_flexeu0 + i * 2;
u32 mmio = flex_mmio[i];
reg_state[state_offset] = mmio;
reg_state[state_offset + 1] = workload->flex_mmio[i];
}
}
}
static int populate_shadow_context(struct intel_vgpu_workload *workload) static int populate_shadow_context(struct intel_vgpu_workload *workload)
{ {
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
...@@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) ...@@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
shadow_ring_context = kmap(page); shadow_ring_context = kmap(page);
sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
#define COPY_REG(name) \ #define COPY_REG(name) \
intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
...@@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) ...@@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
sizeof(*shadow_ring_context), sizeof(*shadow_ring_context),
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
kunmap(page); kunmap(page);
return 0; return 0;
} }
...@@ -376,6 +426,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) ...@@ -376,6 +426,17 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
goto err; goto err;
} }
/* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
* is only updated into ring_scan_buffer, not real ring address
* allocated in later copy_workload_to_ring_buffer. pls be noted
* shadow_ring_buffer_va is now pointed to real ring buffer va
* in copy_workload_to_ring_buffer.
*/
if (bb->bb_offset)
bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+ bb->bb_offset;
/* relocate shadow batch buffer */ /* relocate shadow batch buffer */
bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
if (gmadr_bytes == 8) if (gmadr_bytes == 8)
...@@ -1044,9 +1105,11 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) ...@@ -1044,9 +1105,11 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
s->workloads = kmem_cache_create("gvt-g_vgpu_workload", s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
sizeof(struct intel_vgpu_workload), 0, sizeof(struct intel_vgpu_workload), 0,
SLAB_HWCACHE_ALIGN, SLAB_HWCACHE_ALIGN,
offsetof(struct intel_vgpu_workload, rb_tail),
sizeof_field(struct intel_vgpu_workload, rb_tail),
NULL); NULL);
if (!s->workloads) { if (!s->workloads) {
......
...@@ -110,6 +110,10 @@ struct intel_vgpu_workload { ...@@ -110,6 +110,10 @@ struct intel_vgpu_workload {
/* shadow batch buffer */ /* shadow batch buffer */
struct list_head shadow_bb; struct list_head shadow_bb;
struct intel_shadow_wa_ctx wa_ctx; struct intel_shadow_wa_ctx wa_ctx;
/* oa registers */
u32 oactxctrl;
u32 flex_mmio[7];
}; };
struct intel_vgpu_shadow_bb { struct intel_vgpu_shadow_bb {
...@@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb { ...@@ -120,6 +124,7 @@ struct intel_vgpu_shadow_bb {
u32 *bb_start_cmd_va; u32 *bb_start_cmd_va;
unsigned int clflush; unsigned int clflush;
bool accessing; bool accessing;
unsigned long bb_offset;
}; };
#define workload_q_head(vgpu, ring_id) \ #define workload_q_head(vgpu, ring_id) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment