Commit 6f167a36 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-gt-next-2024-02-15' of...

Merge tag 'drm-intel-gt-next-2024-02-15' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

UAPI Changes:

- Add GuC submission interface version query (Tvrtko Ursulin)

Driver Changes:

Fixes/improvements/new stuff:

- Atomically invalidate userptr on mmu-notifier (Jonathan Cavitt)
- Update handling of MMIO triggered reports (Umesh Nerlige Ramappa)
- Don't make assumptions about intel_wakeref_t type (Jani Nikula)
- Add workaround 14019877138 [xelpg] (Tejas Upadhyay)
- Allow for very slow HuC loading [huc] (John Harrison)
- Flush context destruction worker at suspend [guc] (Alan Previn)
- Close deregister-context race against CT-loss [guc] (Alan Previn)
- Avoid circular locking issue on busyness flush [guc] (John Harrison)
- Use rc6.supported flag from intel_gt for rc6_enable sysfs (Juan Escamilla)
- Reflect the true and current status of rc6_enable (Juan Escamilla)
- Wake GT before sending H2G message [mtl] (Vinay Belgaumkar)
- Restart the heartbeat timer when forcing a pulse (John Harrison)

Future platform enablement:

- Extend driver code of Xe_LPG to Xe_LPG+ [xelpg] (Harish Chegondi)
- Extend some workarounds/tuning to gfx version 12.74 [xelpg] (Matt Roper)

Miscellaneous:

- Reconcile Excess struct member kernel-doc warnings (Randy Dunlap)
- Change wa and EU_PERF_CNTL registers to MCR type [guc] (Shuicheng Lin)
- Add flex arrays to struct i915_syncmap (Erick Archer)
- Increasing the sleep time for live_rc6_manual [selftests] (Anirban Sk)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Zc3iIVsiAwo+bu10@tursulin-desk
parents b13cfb44 eb927f01
...@@ -2160,12 +2160,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ...@@ -2160,12 +2160,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
#ifdef CONFIG_MMU_NOTIFIER #ifdef CONFIG_MMU_NOTIFIER
if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) { if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
read_lock(&eb->i915->mm.notifier_lock);
/*
* count is always at least 1, otherwise __EXEC_USERPTR_USED
* could not have been set
*/
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
struct eb_vma *ev = &eb->vma[i]; struct eb_vma *ev = &eb->vma[i];
struct drm_i915_gem_object *obj = ev->vma->obj; struct drm_i915_gem_object *obj = ev->vma->obj;
...@@ -2177,8 +2171,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb) ...@@ -2177,8 +2171,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
if (err) if (err)
break; break;
} }
read_unlock(&eb->i915->mm.notifier_lock);
} }
#endif #endif
......
...@@ -28,6 +28,13 @@ void i915_gem_suspend(struct drm_i915_private *i915) ...@@ -28,6 +28,13 @@ void i915_gem_suspend(struct drm_i915_private *i915)
GEM_TRACE("%s\n", dev_name(i915->drm.dev)); GEM_TRACE("%s\n", dev_name(i915->drm.dev));
intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0); intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0);
/*
* On rare occasions, we've observed the fence completion triggers
* free_engines asynchronously via rcu_call. Ensure those are done.
* This path is only called on suspend, so it's an acceptable cost.
*/
rcu_barrier();
flush_workqueue(i915->wq); flush_workqueue(i915->wq);
/* /*
...@@ -160,6 +167,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) ...@@ -160,6 +167,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
* machine in an unusable condition. * machine in an unusable condition.
*/ */
/* Like i915_gem_suspend, flush tasks staged from fence triggers */
rcu_barrier();
for_each_gt(gt, i915, i) for_each_gt(gt, i915, i)
intel_gt_suspend_late(gt); intel_gt_suspend_late(gt);
......
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_gem_ioctls.h" #include "i915_gem_ioctls.h"
#include "i915_gem_object.h" #include "i915_gem_object.h"
#include "i915_gem_userptr.h"
#include "i915_scatterlist.h" #include "i915_scatterlist.h"
#ifdef CONFIG_MMU_NOTIFIER #ifdef CONFIG_MMU_NOTIFIER
...@@ -61,36 +60,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, ...@@ -61,36 +60,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range, const struct mmu_notifier_range *range,
unsigned long cur_seq) unsigned long cur_seq)
{ {
struct drm_i915_gem_object *obj = container_of(mni, struct drm_i915_gem_object, userptr.notifier);
struct drm_i915_private *i915 = to_i915(obj->base.dev);
long r;
if (!mmu_notifier_range_blockable(range))
return false;
write_lock(&i915->mm.notifier_lock);
mmu_interval_set_seq(mni, cur_seq); mmu_interval_set_seq(mni, cur_seq);
write_unlock(&i915->mm.notifier_lock);
/*
* We don't wait when the process is exiting. This is valid
* because the object will be cleaned up anyway.
*
* This is also temporarily required as a hack, because we
* cannot currently force non-consistent batch buffers to preempt
* and reschedule by waiting on it, hanging processes on exit.
*/
if (current->flags & PF_EXITING)
return true;
/* we will unbind on next submission, still have userptr pins */
r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP, false,
MAX_SCHEDULE_TIMEOUT);
if (r <= 0)
drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r);
return true; return true;
} }
...@@ -580,15 +550,3 @@ i915_gem_userptr_ioctl(struct drm_device *dev, ...@@ -580,15 +550,3 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
#endif #endif
} }
int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
{
#ifdef CONFIG_MMU_NOTIFIER
rwlock_init(&dev_priv->mm.notifier_lock);
#endif
return 0;
}
void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
{
}
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2021 Intel Corporation
*/
#ifndef __I915_GEM_USERPTR_H__
#define __I915_GEM_USERPTR_H__
struct drm_i915_private;
int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
#endif /* __I915_GEM_USERPTR_H__ */
...@@ -226,7 +226,7 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) ...@@ -226,7 +226,7 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
static int mtl_dummy_pipe_control(struct i915_request *rq) static int mtl_dummy_pipe_control(struct i915_request *rq)
{ {
/* Wa_14016712196 */ /* Wa_14016712196 */
if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) || if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
IS_DG2(rq->i915)) { IS_DG2(rq->i915)) {
u32 *cs; u32 *cs;
...@@ -822,7 +822,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) ...@@ -822,7 +822,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_FLUSH_L3;
/* Wa_14016712196 */ /* Wa_14016712196 */
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
/* dummy PIPE_CONTROL + depth flush */ /* dummy PIPE_CONTROL + depth flush */
cs = gen12_emit_pipe_control(cs, 0, cs = gen12_emit_pipe_control(cs, 0,
PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
......
...@@ -1190,7 +1190,8 @@ static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine) ...@@ -1190,7 +1190,8 @@ static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
num = ARRAY_SIZE(xelpmp_regs); num = ARRAY_SIZE(xelpmp_regs);
} }
} else { } else {
if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) || if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 74) ||
GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) || GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) ||
GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) || GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) { GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
......
...@@ -290,6 +290,9 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine) ...@@ -290,6 +290,9 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine)
heartbeat_commit(rq, &attr); heartbeat_commit(rq, &attr);
GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
/* Ensure the forced pulse gets a full period to execute */
next_heartbeat(engine);
return 0; return 0;
} }
......
...@@ -176,27 +176,13 @@ static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id) ...@@ -176,27 +176,13 @@ static u32 get_residency(struct intel_gt *gt, enum intel_rc6_res_type id)
return DIV_ROUND_CLOSEST_ULL(res, 1000); return DIV_ROUND_CLOSEST_ULL(res, 1000);
} }
static u8 get_rc6_mask(struct intel_gt *gt)
{
u8 mask = 0;
if (HAS_RC6(gt->i915))
mask |= BIT(0);
if (HAS_RC6p(gt->i915))
mask |= BIT(1);
if (HAS_RC6pp(gt->i915))
mask |= BIT(2);
return mask;
}
static ssize_t rc6_enable_show(struct kobject *kobj, static ssize_t rc6_enable_show(struct kobject *kobj,
struct kobj_attribute *attr, struct kobj_attribute *attr,
char *buff) char *buff)
{ {
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); return sysfs_emit(buff, "%x\n", gt->rc6.enabled);
} }
static ssize_t rc6_enable_dev_show(struct device *dev, static ssize_t rc6_enable_dev_show(struct device *dev,
...@@ -205,7 +191,7 @@ static ssize_t rc6_enable_dev_show(struct device *dev, ...@@ -205,7 +191,7 @@ static ssize_t rc6_enable_dev_show(struct device *dev,
{ {
struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name); struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name);
return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); return sysfs_emit(buff, "%x\n", gt->rc6.enabled);
} }
static u32 __rc6_residency_ms_show(struct intel_gt *gt) static u32 __rc6_residency_ms_show(struct intel_gt *gt)
......
...@@ -495,7 +495,7 @@ static unsigned int get_mocs_settings(struct drm_i915_private *i915, ...@@ -495,7 +495,7 @@ static unsigned int get_mocs_settings(struct drm_i915_private *i915,
memset(table, 0, sizeof(struct drm_i915_mocs_table)); memset(table, 0, sizeof(struct drm_i915_mocs_table));
table->unused_entries_index = I915_MOCS_PTE; table->unused_entries_index = I915_MOCS_PTE;
if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) { if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 74))) {
table->size = ARRAY_SIZE(mtl_mocs_table); table->size = ARRAY_SIZE(mtl_mocs_table);
table->table = mtl_mocs_table; table->table = mtl_mocs_table;
table->n_entries = MTL_NUM_MOCS_ENTRIES; table->n_entries = MTL_NUM_MOCS_ENTRIES;
......
...@@ -123,7 +123,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) ...@@ -123,7 +123,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
* temporary wa and should be removed after fixing real cause * temporary wa and should be removed after fixing real cause
* of forcewake timeouts. * of forcewake timeouts.
*/ */
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
pg_enable = pg_enable =
GEN9_MEDIA_PG_ENABLE | GEN9_MEDIA_PG_ENABLE |
GEN11_MEDIA_SAMPLER_PG_ENABLE; GEN11_MEDIA_SAMPLER_PG_ENABLE;
......
...@@ -789,8 +789,13 @@ static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, ...@@ -789,8 +789,13 @@ static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
dg2_ctx_gt_tuning_init(engine, wal); dg2_ctx_gt_tuning_init(engine, wal);
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || /*
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
* gen12_emit_indirect_ctx_rcs() rather than here on some early
* steppings.
*/
if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
} }
...@@ -820,6 +825,9 @@ static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, ...@@ -820,6 +825,9 @@ static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_18019271663 */ /* Wa_18019271663 */
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
/* Wa_14019877138 */
wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
} }
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
...@@ -908,7 +916,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, ...@@ -908,7 +916,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS) if (engine->class != RENDER_CLASS)
goto done; goto done;
if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_ctx_workarounds_init(engine, wal); xelpg_ctx_workarounds_init(engine, wal);
else if (IS_PONTEVECCHIO(i915)) else if (IS_PONTEVECCHIO(i915))
; /* noop; none at this time */ ; /* noop; none at this time */
...@@ -1643,7 +1651,7 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) ...@@ -1643,7 +1651,7 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
static void static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{ {
/* Wa_14018778641 / Wa_18018781329 */ /* Wa_14018575942 / Wa_18018781329 */
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_22016670082 */ /* Wa_22016670082 */
...@@ -1710,7 +1718,7 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) ...@@ -1710,7 +1718,7 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
*/ */
static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
{ {
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
} }
...@@ -1743,7 +1751,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) ...@@ -1743,7 +1751,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
return; return;
} }
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_gt_workarounds_init(gt, wal); xelpg_gt_workarounds_init(gt, wal);
else if (IS_PONTEVECCHIO(i915)) else if (IS_PONTEVECCHIO(i915))
pvc_gt_workarounds_init(gt, wal); pvc_gt_workarounds_init(gt, wal);
...@@ -2216,7 +2224,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) ...@@ -2216,7 +2224,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
if (engine->gt->type == GT_MEDIA) if (engine->gt->type == GT_MEDIA)
; /* none yet */ ; /* none yet */
else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_whitelist_build(engine); xelpg_whitelist_build(engine);
else if (IS_PONTEVECCHIO(i915)) else if (IS_PONTEVECCHIO(i915))
pvc_whitelist_build(engine); pvc_whitelist_build(engine);
...@@ -2828,7 +2836,7 @@ add_render_compute_tuning_settings(struct intel_gt *gt, ...@@ -2828,7 +2836,7 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
{ {
struct drm_i915_private *i915 = gt->i915; struct drm_i915_private *i915 = gt->i915;
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
/* /*
...@@ -2881,7 +2889,8 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li ...@@ -2881,7 +2889,8 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
} }
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74)))
/* Wa_14017856879 */ /* Wa_14017856879 */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
......
...@@ -62,12 +62,12 @@ int live_rc6_manual(void *arg) ...@@ -62,12 +62,12 @@ int live_rc6_manual(void *arg)
dt = ktime_get(); dt = ktime_get();
rc0_power = librapl_energy_uJ(); rc0_power = librapl_energy_uJ();
msleep(250); msleep(1000);
rc0_power = librapl_energy_uJ() - rc0_power; rc0_power = librapl_energy_uJ() - rc0_power;
dt = ktime_sub(ktime_get(), dt); dt = ktime_sub(ktime_get(), dt);
res[1] = rc6_residency(rc6); res[1] = rc6_residency(rc6);
if ((res[1] - res[0]) >> 10) { if ((res[1] - res[0]) >> 10) {
pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n", pr_err("RC6 residency increased by %lldus while disabled for 1000ms!\n",
(res[1] - res[0]) >> 10); (res[1] - res[0]) >> 10);
err = -EINVAL; err = -EINVAL;
goto out_unlock; goto out_unlock;
......
...@@ -377,8 +377,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset, ...@@ -377,8 +377,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
CCS_MASK(engine->gt)) CCS_MASK(engine->gt))
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true); ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true);
/*
* some of the WA registers are MCR registers. As it is safe to
* use MCR form for non-MCR registers, for code simplicity, all
* WA registers are added with MCR form.
*/
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
ret |= GUC_MMIO_REG_ADD(gt, regset, wa->reg, wa->masked_reg); ret |= GUC_MCR_REG_ADD(gt, regset, wa->mcr_reg, wa->masked_reg);
/* Be extra paranoid and include all whitelist registers. */ /* Be extra paranoid and include all whitelist registers. */
for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
...@@ -394,13 +399,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset, ...@@ -394,13 +399,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false); ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
if (GRAPHICS_VER(engine->i915) >= 12) { if (GRAPHICS_VER(engine->i915) >= 12) {
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false); ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL0)), false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false); ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL1)), false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false); ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL2)), false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false); ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL3)), false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false); ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL4)), false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false); ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL5)), false);
ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false); ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL6)), false);
} }
return ret ? -1 : 0; return ret ? -1 : 0;
......
...@@ -184,7 +184,7 @@ static int guc_wait_ucode(struct intel_guc *guc) ...@@ -184,7 +184,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
* in the seconds range. However, there is a limit on how long an * in the seconds range. However, there is a limit on how long an
* individual wait_for() can wait. So wrap it in a loop. * individual wait_for() can wait. So wrap it in a loop.
*/ */
before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps); before_freq = intel_rps_read_actual_frequency(&gt->rps);
before = ktime_get(); before = ktime_get();
for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) { for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) {
ret = wait_for(guc_load_done(uncore, &status, &success), 1000); ret = wait_for(guc_load_done(uncore, &status, &success), 1000);
...@@ -192,7 +192,7 @@ static int guc_wait_ucode(struct intel_guc *guc) ...@@ -192,7 +192,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
break; break;
guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz, status = 0x%08X [0x%02X/%02X]\n", guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz, status = 0x%08X [0x%02X/%02X]\n",
count, intel_rps_read_actual_frequency(&uncore->gt->rps), status, count, intel_rps_read_actual_frequency(&gt->rps), status,
REG_FIELD_GET(GS_BOOTROM_MASK, status), REG_FIELD_GET(GS_BOOTROM_MASK, status),
REG_FIELD_GET(GS_UKERNEL_MASK, status)); REG_FIELD_GET(GS_UKERNEL_MASK, status));
} }
...@@ -204,7 +204,7 @@ static int guc_wait_ucode(struct intel_guc *guc) ...@@ -204,7 +204,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status); u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n", guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n",
status, delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), ret); status, delta_ms, intel_rps_read_actual_frequency(&gt->rps), ret);
guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
REG_FIELD_GET(GS_MIA_IN_RESET, status), REG_FIELD_GET(GS_MIA_IN_RESET, status),
bootrom, ukernel, bootrom, ukernel,
...@@ -254,11 +254,11 @@ static int guc_wait_ucode(struct intel_guc *guc) ...@@ -254,11 +254,11 @@ static int guc_wait_ucode(struct intel_guc *guc)
guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n", guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
delta_ms, status, count, ret); delta_ms, status, count, ret);
guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n", guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n",
intel_rps_read_actual_frequency(&uncore->gt->rps), before_freq, intel_rps_read_actual_frequency(&gt->rps), before_freq,
intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt))); intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
} else { } else {
guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n", guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), delta_ms, intel_rps_read_actual_frequency(&gt->rps),
before_freq, status, count, ret); before_freq, status, count, ret);
} }
......
...@@ -236,6 +236,13 @@ set_context_destroyed(struct intel_context *ce) ...@@ -236,6 +236,13 @@ set_context_destroyed(struct intel_context *ce)
ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
} }
static inline void
clr_context_destroyed(struct intel_context *ce)
{
lockdep_assert_held(&ce->guc_state.lock);
ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED;
}
static inline bool context_pending_disable(struct intel_context *ce) static inline bool context_pending_disable(struct intel_context *ce)
{ {
return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
...@@ -613,6 +620,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, ...@@ -613,6 +620,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
u32 g2h_len_dw, u32 g2h_len_dw,
bool loop) bool loop)
{ {
int ret;
/* /*
* We always loop when a send requires a reply (i.e. g2h_len_dw > 0), * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
* so we don't handle the case where we don't get a reply because we * so we don't handle the case where we don't get a reply because we
...@@ -623,7 +632,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, ...@@ -623,7 +632,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
if (g2h_len_dw) if (g2h_len_dw)
atomic_inc(&guc->outstanding_submission_g2h); atomic_inc(&guc->outstanding_submission_g2h);
return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
if (ret)
atomic_dec(&guc->outstanding_submission_g2h);
return ret;
} }
int intel_guc_wait_for_pending_msg(struct intel_guc *guc, int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
...@@ -1362,7 +1375,45 @@ static void guc_enable_busyness_worker(struct intel_guc *guc) ...@@ -1362,7 +1375,45 @@ static void guc_enable_busyness_worker(struct intel_guc *guc)
static void guc_cancel_busyness_worker(struct intel_guc *guc) static void guc_cancel_busyness_worker(struct intel_guc *guc)
{ {
cancel_delayed_work_sync(&guc->timestamp.work); /*
* There are many different call stacks that can get here. Some of them
* hold the reset mutex. The busyness worker also attempts to acquire the
* reset mutex. Synchronously flushing a worker thread requires acquiring
* the worker mutex. Lockdep sees this as a conflict. It thinks that the
* flush can deadlock because it holds the worker mutex while waiting for
* the reset mutex, but another thread is holding the reset mutex and might
* attempt to use other worker functions.
*
* In practice, this scenario does not exist because the busyness worker
* does not block waiting for the reset mutex. It does a try-lock on it and
* immediately exits if the lock is already held. Unfortunately, the mutex
* in question (I915_RESET_BACKOFF) is an i915 implementation which has lockdep
* annotation but not to the extent of explaining the 'might lock' is also a
* 'does not need to lock'. So one option would be to add more complex lockdep
* annotations to ignore the issue (if at all possible). A simpler option is to
* just not flush synchronously when a rest in progress. Given that the worker
* will just early exit and re-schedule itself anyway, there is no advantage
* to running it immediately.
*
* If a reset is not in progress, then the synchronous flush may be required.
* As noted many call stacks lead here, some during suspend and driver unload
* which do require a synchronous flush to make sure the worker is stopped
* before memory is freed.
*
* Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
* every possible call stack is unfeasible. It would be too intrusive to many
* areas that really don't care about the GuC backend. However, there is the
* 'reset_in_progress' flag available, so just use that.
*
* And note that in the case of a reset occurring during driver unload
* (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set
* is fine because there is another cancel in _finish (when the reset flag is
* not).
*/
if (guc_to_gt(guc)->uc.reset_in_progress)
cancel_delayed_work(&guc->timestamp.work);
else
cancel_delayed_work_sync(&guc->timestamp.work);
} }
static void __reset_guc_busyness_stats(struct intel_guc *guc) static void __reset_guc_busyness_stats(struct intel_guc *guc)
...@@ -1613,6 +1664,11 @@ static void guc_flush_submissions(struct intel_guc *guc) ...@@ -1613,6 +1664,11 @@ static void guc_flush_submissions(struct intel_guc *guc)
spin_unlock_irqrestore(&sched_engine->lock, flags); spin_unlock_irqrestore(&sched_engine->lock, flags);
} }
void intel_guc_submission_flush_work(struct intel_guc *guc)
{
flush_work(&guc->submission_state.destroyed_worker);
}
static void guc_flush_destroyed_contexts(struct intel_guc *guc); static void guc_flush_destroyed_contexts(struct intel_guc *guc);
void intel_guc_submission_reset_prepare(struct intel_guc *guc) void intel_guc_submission_reset_prepare(struct intel_guc *guc)
...@@ -1948,8 +2004,16 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) ...@@ -1948,8 +2004,16 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
void intel_guc_submission_reset_finish(struct intel_guc *guc) void intel_guc_submission_reset_finish(struct intel_guc *guc)
{ {
/*
* Ensure the busyness worker gets cancelled even on a fatal wedge.
* Note that reset_prepare is not allowed to because it confuses lockdep.
*/
if (guc_submission_initialized(guc))
guc_cancel_busyness_worker(guc);
/* Reset called during driver load or during wedge? */ /* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) || if (unlikely(!guc_submission_initialized(guc) ||
!intel_guc_is_fw_running(guc) ||
intel_gt_is_wedged(guc_to_gt(guc)))) { intel_gt_is_wedged(guc_to_gt(guc)))) {
return; return;
} }
...@@ -3283,12 +3347,13 @@ static void guc_context_close(struct intel_context *ce) ...@@ -3283,12 +3347,13 @@ static void guc_context_close(struct intel_context *ce)
spin_unlock_irqrestore(&ce->guc_state.lock, flags); spin_unlock_irqrestore(&ce->guc_state.lock, flags);
} }
static inline void guc_lrc_desc_unpin(struct intel_context *ce) static inline int guc_lrc_desc_unpin(struct intel_context *ce)
{ {
struct intel_guc *guc = ce_to_guc(ce); struct intel_guc *guc = ce_to_guc(ce);
struct intel_gt *gt = guc_to_gt(guc); struct intel_gt *gt = guc_to_gt(guc);
unsigned long flags; unsigned long flags;
bool disabled; bool disabled;
int ret;
GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
...@@ -3299,18 +3364,41 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) ...@@ -3299,18 +3364,41 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
spin_lock_irqsave(&ce->guc_state.lock, flags); spin_lock_irqsave(&ce->guc_state.lock, flags);
disabled = submission_disabled(guc); disabled = submission_disabled(guc);
if (likely(!disabled)) { if (likely(!disabled)) {
/*
* Take a gt-pm ref and change context state to be destroyed.
* NOTE: a G2H IRQ that comes after will put this gt-pm ref back
*/
__intel_gt_pm_get(gt); __intel_gt_pm_get(gt);
set_context_destroyed(ce); set_context_destroyed(ce);
clr_context_registered(ce); clr_context_registered(ce);
} }
spin_unlock_irqrestore(&ce->guc_state.lock, flags); spin_unlock_irqrestore(&ce->guc_state.lock, flags);
if (unlikely(disabled)) { if (unlikely(disabled)) {
release_guc_id(guc, ce); release_guc_id(guc, ce);
__guc_context_destroy(ce); __guc_context_destroy(ce);
return; return 0;
} }
deregister_context(ce, ce->guc_id.id); /*
* GuC is active, lets destroy this context, but at this point we can still be racing
* with suspend, so we undo everything if the H2G fails in deregister_context so
* that GuC reset will find this context during clean up.
*/
ret = deregister_context(ce, ce->guc_id.id);
if (ret) {
spin_lock(&ce->guc_state.lock);
set_context_registered(ce);
clr_context_destroyed(ce);
spin_unlock(&ce->guc_state.lock);
/*
* As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements
* the wakeref immediately but per function spec usage call this after unlock.
*/
intel_wakeref_put_async(&gt->wakeref);
}
return ret;
} }
static void __guc_context_destroy(struct intel_context *ce) static void __guc_context_destroy(struct intel_context *ce)
...@@ -3378,7 +3466,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc) ...@@ -3378,7 +3466,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc)
if (!ce) if (!ce)
break; break;
guc_lrc_desc_unpin(ce); if (guc_lrc_desc_unpin(ce)) {
/*
* This means GuC's CT link severed mid-way which could happen
* in suspend-resume corner cases. In this case, put the
* context back into the destroyed_contexts list which will
* get picked up on the next context deregistration event or
* purged in a GuC sanitization event (reset/unload/wedged/...).
*/
spin_lock_irqsave(&guc->submission_state.lock, flags);
list_add_tail(&ce->destroyed_link,
&guc->submission_state.destroyed_contexts);
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
/* Bail now since the list might never be emptied if h2gs fail */
break;
}
} }
} }
...@@ -3389,6 +3492,17 @@ static void destroyed_worker_func(struct work_struct *w) ...@@ -3389,6 +3492,17 @@ static void destroyed_worker_func(struct work_struct *w)
struct intel_gt *gt = guc_to_gt(guc); struct intel_gt *gt = guc_to_gt(guc);
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
/*
* In rare cases we can get here via async context-free fence-signals that
* come very late in suspend flow or very early in resume flows. In these
* cases, GuC won't be ready but just skipping it here is fine as these
* pending-destroy-contexts get destroyed totally at GuC reset time at the
* end of suspend.. OR.. this worker can be picked up later on the next
* context destruction trigger after resume-completes
*/
if (!intel_guc_is_ready(guc))
return;
with_intel_gt_pm(gt, wakeref) with_intel_gt_pm(gt, wakeref)
deregister_destroyed_contexts(guc); deregister_destroyed_contexts(guc);
} }
......
...@@ -38,6 +38,8 @@ int intel_guc_wait_for_pending_msg(struct intel_guc *guc, ...@@ -38,6 +38,8 @@ int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
bool interruptible, bool interruptible,
long timeout); long timeout);
void intel_guc_submission_flush_work(struct intel_guc *guc);
static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
{ {
return guc->submission_supported; return guc->submission_supported;
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <linux/types.h> #include <linux/types.h>
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_rps.h"
#include "intel_guc_reg.h" #include "intel_guc_reg.h"
#include "intel_huc.h" #include "intel_huc.h"
#include "intel_huc_print.h" #include "intel_huc_print.h"
...@@ -447,17 +448,68 @@ static const char *auth_mode_string(struct intel_huc *huc, ...@@ -447,17 +448,68 @@ static const char *auth_mode_string(struct intel_huc *huc,
return partial ? "clear media" : "all workloads"; return partial ? "clear media" : "all workloads";
} }
/*
* Use a longer timeout for debug builds so that problems can be detected
* and analysed. But a shorter timeout for releases so that user's don't
* wait forever to find out there is a problem. Note that the only reason
* an end user should hit the timeout is in case of extreme thermal throttling.
* And a system that is that hot during boot is probably dead anyway!
*/
#if defined(CONFIG_DRM_I915_DEBUG_GEM)
#define HUC_LOAD_RETRY_LIMIT 20
#else
#define HUC_LOAD_RETRY_LIMIT 3
#endif
int intel_huc_wait_for_auth_complete(struct intel_huc *huc, int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
enum intel_huc_authentication_type type) enum intel_huc_authentication_type type)
{ {
struct intel_gt *gt = huc_to_gt(huc); struct intel_gt *gt = huc_to_gt(huc);
int ret; struct intel_uncore *uncore = gt->uncore;
ktime_t before, after, delta;
int ret, count;
u64 delta_ms;
u32 before_freq;
ret = __intel_wait_for_register(gt->uncore, /*
huc->status[type].reg, * The KMD requests maximum frequency during driver load, however thermal
huc->status[type].mask, * throttling can force the frequency down to minimum (although the board
huc->status[type].value, * really should never get that hot in real life!). IFWI issues have been
2, 50, NULL); * seen to cause sporadic failures to grant the higher frequency. And at
* minimum frequency, the authentication time can be in the seconds range.
* Note that there is a limit on how long an individual wait_for() can wait.
* So wrap it in a loop.
*/
before_freq = intel_rps_read_actual_frequency(&gt->rps);
before = ktime_get();
for (count = 0; count < HUC_LOAD_RETRY_LIMIT; count++) {
ret = __intel_wait_for_register(gt->uncore,
huc->status[type].reg,
huc->status[type].mask,
huc->status[type].value,
2, 1000, NULL);
if (!ret)
break;
huc_dbg(huc, "auth still in progress, count = %d, freq = %dMHz, status = 0x%08X\n",
count, intel_rps_read_actual_frequency(&gt->rps),
huc->status[type].reg.reg);
}
after = ktime_get();
delta = ktime_sub(after, before);
delta_ms = ktime_to_ms(delta);
if (delta_ms > 50) {
huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
delta_ms, huc->status[type].reg.reg, count, ret);
huc_warn(huc, "excessive auth time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n",
intel_rps_read_actual_frequency(&gt->rps), before_freq,
intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
} else {
huc_dbg(huc, "auth took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
delta_ms, intel_rps_read_actual_frequency(&gt->rps),
before_freq, huc->status[type].reg.reg, count, ret);
}
/* mark the load process as complete even if the wait failed */ /* mark the load process as complete even if the wait failed */
delayed_huc_load_complete(huc); delayed_huc_load_complete(huc);
......
...@@ -640,7 +640,7 @@ void intel_uc_reset_finish(struct intel_uc *uc) ...@@ -640,7 +640,7 @@ void intel_uc_reset_finish(struct intel_uc *uc)
uc->reset_in_progress = false; uc->reset_in_progress = false;
/* Firmware expected to be running when this function is called */ /* Firmware expected to be running when this function is called */
if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc)) if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_reset_finish(guc); intel_guc_submission_reset_finish(guc);
} }
...@@ -690,6 +690,8 @@ void intel_uc_suspend(struct intel_uc *uc) ...@@ -690,6 +690,8 @@ void intel_uc_suspend(struct intel_uc *uc)
return; return;
} }
intel_guc_submission_flush_work(guc);
with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) { with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) {
err = intel_guc_suspend(guc); err = intel_guc_suspend(guc);
if (err) if (err)
......
...@@ -147,7 +147,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj) ...@@ -147,7 +147,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj)
{ {
struct drm_i915_private *i915 = obj_to_i915(obj); struct drm_i915_private *i915 = obj_to_i915(obj);
if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) { if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 74))) {
switch (obj->pat_index) { switch (obj->pat_index) {
case 0: return " WB"; case 0: return " WB";
case 1: return " WT"; case 1: return " WT";
......
...@@ -165,14 +165,6 @@ struct i915_gem_mm { ...@@ -165,14 +165,6 @@ struct i915_gem_mm {
struct notifier_block vmap_notifier; struct notifier_block vmap_notifier;
struct shrinker *shrinker; struct shrinker *shrinker;
#ifdef CONFIG_MMU_NOTIFIER
/**
* notifier_lock for mmu notifiers, memory may not be allocated
* while holding this lock.
*/
rwlock_t notifier_lock;
#endif
/* shrinker accounting, also useful for userland debugging */ /* shrinker accounting, also useful for userland debugging */
u64 shrink_memory; u64 shrink_memory;
u32 shrink_count; u32 shrink_count;
......
...@@ -48,7 +48,6 @@ ...@@ -48,7 +48,6 @@
#include "gem/i915_gem_object_frontbuffer.h" #include "gem/i915_gem_object_frontbuffer.h"
#include "gem/i915_gem_pm.h" #include "gem/i915_gem_pm.h"
#include "gem/i915_gem_region.h" #include "gem/i915_gem_region.h"
#include "gem/i915_gem_userptr.h"
#include "gt/intel_engine_user.h" #include "gt/intel_engine_user.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h" #include "gt/intel_gt_pm.h"
...@@ -1165,10 +1164,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ...@@ -1165,10 +1164,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv)) if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
RUNTIME_INFO(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K; RUNTIME_INFO(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K;
ret = i915_gem_init_userptr(dev_priv);
if (ret)
return ret;
for_each_gt(gt, dev_priv, i) { for_each_gt(gt, dev_priv, i) {
intel_uc_fetch_firmwares(&gt->uc); intel_uc_fetch_firmwares(&gt->uc);
intel_wopcm_init(&gt->wopcm); intel_wopcm_init(&gt->wopcm);
......
...@@ -3225,7 +3225,7 @@ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) ...@@ -3225,7 +3225,7 @@ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
struct intel_gt *gt = to_gt(i915); struct intel_gt *gt = to_gt(i915);
/* Wa_18013179988 */ /* Wa_18013179988 */
if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
u32 reg, shift; u32 reg, shift;
......
...@@ -551,6 +551,38 @@ static int query_hwconfig_blob(struct drm_i915_private *i915, ...@@ -551,6 +551,38 @@ static int query_hwconfig_blob(struct drm_i915_private *i915,
return hwconfig->size; return hwconfig->size;
} }
static int
query_guc_submission_version(struct drm_i915_private *i915,
struct drm_i915_query_item *query)
{
struct drm_i915_query_guc_submission_version __user *query_ptr =
u64_to_user_ptr(query->data_ptr);
struct drm_i915_query_guc_submission_version ver;
struct intel_guc *guc = &to_gt(i915)->uc.guc;
const size_t size = sizeof(ver);
int ret;
if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc))
return -ENODEV;
ret = copy_query_item(&ver, size, size, query);
if (ret != 0)
return ret;
if (ver.branch || ver.major || ver.minor || ver.patch)
return -EINVAL;
ver.branch = 0;
ver.major = guc->submission_version.major;
ver.minor = guc->submission_version.minor;
ver.patch = guc->submission_version.patch;
if (copy_to_user(query_ptr, &ver, size))
return -EFAULT;
return 0;
}
static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item) = { struct drm_i915_query_item *query_item) = {
query_topology_info, query_topology_info,
...@@ -559,6 +591,7 @@ static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, ...@@ -559,6 +591,7 @@ static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
query_memregion_info, query_memregion_info,
query_hwconfig_blob, query_hwconfig_blob,
query_geometry_subslices, query_geometry_subslices,
query_guc_submission_version,
}; };
int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
......
...@@ -75,13 +75,10 @@ struct i915_syncmap { ...@@ -75,13 +75,10 @@ struct i915_syncmap {
unsigned int height; unsigned int height;
unsigned int bitmap; unsigned int bitmap;
struct i915_syncmap *parent; struct i915_syncmap *parent;
/* union {
* Following this header is an array of either seqno or child pointers: DECLARE_FLEX_ARRAY(u32, seqno);
* union { DECLARE_FLEX_ARRAY(struct i915_syncmap *, child);
* u32 seqno[KSYNCMAP]; };
* struct i915_syncmap *child[KSYNCMAP];
* };
*/
}; };
/** /**
...@@ -99,13 +96,13 @@ void i915_syncmap_init(struct i915_syncmap **root) ...@@ -99,13 +96,13 @@ void i915_syncmap_init(struct i915_syncmap **root)
static inline u32 *__sync_seqno(struct i915_syncmap *p) static inline u32 *__sync_seqno(struct i915_syncmap *p)
{ {
GEM_BUG_ON(p->height); GEM_BUG_ON(p->height);
return (u32 *)(p + 1); return p->seqno;
} }
static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
{ {
GEM_BUG_ON(!p->height); GEM_BUG_ON(!p->height);
return (struct i915_syncmap **)(p + 1); return p->child;
} }
static inline unsigned int static inline unsigned int
...@@ -200,7 +197,7 @@ __sync_alloc_leaf(struct i915_syncmap *parent, u64 id) ...@@ -200,7 +197,7 @@ __sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
{ {
struct i915_syncmap *p; struct i915_syncmap *p;
p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL); p = kmalloc(struct_size(p, seqno, KSYNCMAP), GFP_KERNEL);
if (unlikely(!p)) if (unlikely(!p))
return NULL; return NULL;
...@@ -282,7 +279,7 @@ static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) ...@@ -282,7 +279,7 @@ static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
unsigned int above; unsigned int above;
/* Insert a join above the current layer */ /* Insert a join above the current layer */
next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next), next = kzalloc(struct_size(next, child, KSYNCMAP),
GFP_KERNEL); GFP_KERNEL);
if (unlikely(!next)) if (unlikely(!next))
return -ENOMEM; return -ENOMEM;
......
...@@ -1800,7 +1800,10 @@ static const struct intel_forcewake_range __mtl_fw_ranges[] = { ...@@ -1800,7 +1800,10 @@ static const struct intel_forcewake_range __mtl_fw_ranges[] = {
GEN_FW_RANGE(0x24000, 0x2ffff, 0), /* GEN_FW_RANGE(0x24000, 0x2ffff, 0), /*
0x24000 - 0x2407f: always on 0x24000 - 0x2407f: always on
0x24080 - 0x2ffff: reserved */ 0x24080 - 0x2ffff: reserved */
GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT) GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT),
GEN_FW_RANGE(0x40000, 0x1901ef, 0),
GEN_FW_RANGE(0x1901f0, 0x1901f3, FORCEWAKE_GT)
/* FIXME: WA to wake GT while triggering H2G */
}; };
/* /*
......
...@@ -3013,6 +3013,7 @@ struct drm_i915_query_item { ...@@ -3013,6 +3013,7 @@ struct drm_i915_query_item {
* - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions) * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
* - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`) * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
* - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info) * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
* - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version)
*/ */
__u64 query_id; __u64 query_id;
#define DRM_I915_QUERY_TOPOLOGY_INFO 1 #define DRM_I915_QUERY_TOPOLOGY_INFO 1
...@@ -3021,6 +3022,7 @@ struct drm_i915_query_item { ...@@ -3021,6 +3022,7 @@ struct drm_i915_query_item {
#define DRM_I915_QUERY_MEMORY_REGIONS 4 #define DRM_I915_QUERY_MEMORY_REGIONS 4
#define DRM_I915_QUERY_HWCONFIG_BLOB 5 #define DRM_I915_QUERY_HWCONFIG_BLOB 5
#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6 #define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6
#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION 7
/* Must be kept compact -- no holes and well documented */ /* Must be kept compact -- no holes and well documented */
/** /**
...@@ -3566,6 +3568,16 @@ struct drm_i915_query_memory_regions { ...@@ -3566,6 +3568,16 @@ struct drm_i915_query_memory_regions {
struct drm_i915_memory_region_info regions[]; struct drm_i915_memory_region_info regions[];
}; };
/**
* struct drm_i915_query_guc_submission_version - query GuC submission interface version
*/
struct drm_i915_query_guc_submission_version {
__u32 branch;
__u32 major;
__u32 minor;
__u32 patch;
};
/** /**
* DOC: GuC HWCONFIG blob uAPI * DOC: GuC HWCONFIG blob uAPI
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment