Commit a4172af3 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-xe-next-2024-07-30' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

drm-xe-next for 6.12

UAPI Changes:
- Rename xe perf layer as xe observation layer, but was
  also made available via fixes to previous verison (Ashutosh)
- Use write-back caching mode for system memory on DGFX,
  but was also mad available via fixes to previous version (Thomas)
- Expose SIMD16 EU mask in topology query for userspace to know
  the type of EU, as available in PVC, Lunar Lake and Battlemage
  (Lucas)
- Return ENOBUFS instead of ENOMEM in vm_bind if failure is tied
  to an array of binds (Matthew Brost)

Driver Changes:
- Log cleanup moving messages to debug priority (Michal Wajdeczko)
- Add timeout to fences to adhere to dma_buf rules (Matthew Brost)
- Rename old engine nomenclature to exec_queue (Matthew Brost)
- Convert multiple bind ops to 1 job (Matthew Brost)
- Add error injection for vm bind to help testing error path
  (Matthew Brost)
- Fix error handling in page table to propagate correctly
  to userspace (Matthew Brost)
- Re-organize and cleanup SR-IOV related registers (Michal Wajdeczko)
- Make the device write barrier compatible with VF (Michal Wajdeczko)
- New display workarounds for Battlemage (Matthew  Auld)
- New media workarounds for Lunar Lake and Battlemage (Ngai-Mint Kwan)
- New graphics workarounds for Lunar Lake (Bommu Krishnaiah)
- Tracepoint updates (Matthew Brost, Nirmoy Das)
- Cleanup the header generation for OOB workarounds (Lucas De Marchi)
- Fix leaking HDCP-related object (Nirmoy Das)
- Serialize L2 flushes to avoid races (Tejas Upadhyay)
- Log pid and comm on job timeout (José Roberto de Souza)
- Simplify boilerplate code for live kunit (Michal Wajdeczko)
- Improve kunit skips for live kunit (Michal Wajdeczko)
- Fix xe_sync cleanup when handling xe_exec ioctl (Ashutosh Dixit)
- Limit fair VF LMEM provisioning (Michal Wajdeczko)
- New workaround to fence mmio writes in Lunar Lake (Tejas Upadhyay)
- Warn on writes inaccessible register in VF (Michal Wajdeczko)
- Fix register lookup in VF (Michal Wajdeczko)
- Add GSC support for Battlemage (Alexander Usyskin)
- Fix wedging only the GT in which timeout occurred (Matthew Brost)
- Block device suspend when wedging (Matthew Brost)
- Handle compression and migration changes for Battlemage
  (Akshata Jahagirdar)
- Limit access of stolen memory for Lunar Lake (Uma Shankar)
- Fail invalid addresses during user fence creation (Matthew Brost)
- Refcount xe_file to safely and accurately store fdinfo stats
  (Umesh Nerlige Ramappa)
- Cleanup and fix PM reference for TLB invalidation code
  (Matthew Brost)
- Fix PM reference handling when communicating with GuC (Matthew Brost)
- Add new BO flag for 2 MiB alignement and use in VF (Michal Wajdeczko)
- Simplify MMIO setup for multi-tile platforms (Lucas De Marchi)
- Add check for uninitialized access to OOB workarounds
  (Lucas De Marchi)
- New GSC and HuC firmware blobs for Lunar Lake and Battlemage
  (Daniele Ceraolo Spurio)
- Unify mmio wait logic (Gustavo Sousa)
- Fix off-by-one when processing RTP rules (Lucas De Marchi)
- Future-proof migrate logic with compressed PAT flag (Matt Roper)
- Add WA kunit tests for Battlemage (Lucas De Marchi)
- Test active tracking for workaorunds with kunit (Lucas De Marchi)
- Add kunit tests for RTP with no actions (Lucas De Marchi)
- Unify parse of OR rules in RTP (Lucas De Marchi)
- Add performance tuning for Battlemage (Sai Teja Pottumuttu)
- Make bit masks unsigned (Geert Uytterhoeven)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/k7xuktfav4zmtxxjr77glu2hszypvzgmzghoumh757nqfnk7kn@ccfi4ts3ytbk
parents de9c2c66 f2881dfd
......@@ -6,8 +6,16 @@
#ifndef __INTEL_DISPLAY_WA_H__
#define __INTEL_DISPLAY_WA_H__
#include <linux/types.h>
struct drm_i915_private;
void intel_display_wa_apply(struct drm_i915_private *i915);
#ifdef I915
static inline bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915) { return false; }
#else
bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915);
#endif
#endif
......@@ -56,6 +56,7 @@
#include "intel_display_device.h"
#include "intel_display_trace.h"
#include "intel_display_types.h"
#include "intel_display_wa.h"
#include "intel_fbc.h"
#include "intel_fbc_regs.h"
#include "intel_frontbuffer.h"
......@@ -1237,6 +1238,11 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
return 0;
}
if (intel_display_needs_wa_16023588340(i915)) {
plane_state->no_fbc_reason = "Wa_16023588340";
return 0;
}
/* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
if (i915_vtd_active(i915) && (IS_SKYLAKE(i915) || IS_BROXTON(i915))) {
plane_state->no_fbc_reason = "VT-d enabled";
......
......@@ -12,32 +12,15 @@ subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
subdir-ccflags-y += -I$(obj) -I$(src)
# generated sources
hostprogs := xe_gen_wa_oob
hostprogs := xe_gen_wa_oob
generated_oob := $(obj)/generated/xe_wa_oob.c $(obj)/generated/xe_wa_oob.h
quiet_cmd_wa_oob = GEN $(notdir $(generated_oob))
cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob)
$(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
$(src)/xe_wa_oob.rules
$(call cmd,wa_oob)
uses_generated_oob := \
$(obj)/xe_ggtt.o \
$(obj)/xe_gsc.o \
$(obj)/xe_gt.o \
$(obj)/xe_guc.o \
$(obj)/xe_guc_ads.o \
$(obj)/xe_guc_pc.o \
$(obj)/xe_migrate.o \
$(obj)/xe_ring_ops.o \
$(obj)/xe_vm.o \
$(obj)/xe_wa.o \
$(obj)/xe_ttm_stolen_mgr.o
$(uses_generated_oob): $(generated_oob)
# Please keep these build lists sorted!
# core driver code
......@@ -192,6 +175,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
display/xe_display.o \
display/xe_display_misc.o \
display/xe_display_rps.o \
display/xe_display_wa.o \
display/xe_dsb_buffer.o \
display/xe_fb_pin.o \
display/xe_hdcp_gsc.o \
......@@ -320,3 +304,6 @@ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
$(obj)/%.hdrtest: $(src)/%.h FORCE
$(call if_changed_dep,hdrtest)
uses_generated_oob := $(addprefix $(obj)/, $(xe-y))
$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h
......@@ -10,6 +10,9 @@
#include "xe_bo.h"
#include "xe_gt.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_wa.h"
#include <generated/xe_wa_oob.h>
struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
struct drm_fb_helper_surface_size *sizes)
......@@ -37,7 +40,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
size = PAGE_ALIGN(size);
obj = ERR_PTR(-ENODEV);
if (!IS_DGFX(xe)) {
if (!IS_DGFX(xe) && !XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) {
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
NULL, size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
......@@ -48,6 +51,7 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
else
drm_info(&xe->drm, "Allocated fbdev into stolen failed: %li\n", PTR_ERR(obj));
}
if (IS_ERR(obj)) {
obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
......
// SPDX-License-Identifier: MIT
/*
* Copyright © 2024 Intel Corporation
*/
#include "intel_display_wa.h"
#include "xe_device.h"
#include "xe_wa.h"
#include <generated/xe_wa_oob.h>
bool intel_display_needs_wa_16023588340(struct drm_i915_private *i915)
{
return XE_WA(xe_root_mmio_gt(i915), 16023588340);
}
......@@ -7,6 +7,8 @@
#include "intel_display_types.h"
#include "intel_dsb_buffer.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_gt.h"
u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
......@@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
xe_device_l2_flush(xe);
}
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
......@@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
xe_device_l2_flush(xe);
}
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
......
......@@ -10,6 +10,7 @@
#include "intel_fb.h"
#include "intel_fb_pin.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_pm.h"
......@@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
/* Ensure DPT writes are flushed */
xe_device_l2_flush(xe);
return vma;
err_unpin:
......
......@@ -18,6 +18,9 @@
#include "intel_frontbuffer.h"
#include "intel_plane_initial.h"
#include "xe_bo.h"
#include "xe_wa.h"
#include <generated/xe_wa_oob.h>
static bool
intel_reuse_initial_plane_obj(struct intel_crtc *this,
......@@ -104,6 +107,9 @@ initial_plane_bo(struct xe_device *xe,
phys_base = base;
flags |= XE_BO_FLAG_STOLEN;
if (XE_WA(xe_root_mmio_gt(xe), 22019338487_display))
return NULL;
/*
* If the FB is too big, just don't use it since fbdev is not very
* important and we should probably use that space with FBC or other
......
......@@ -80,6 +80,9 @@
#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
#define CG_DIS_CNTLBUS REG_BIT(6)
#define CCS_AUX_INV XE_REG(0x4208)
#define VD0_AUX_INV XE_REG(0x4218)
......@@ -88,6 +91,8 @@
#define VE1_AUX_INV XE_REG(0x42b8)
#define AUX_INV REG_BIT(0)
#define XE2_LMEM_CFG XE_REG(0x48b0)
#define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4)
#define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910)
#define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8)
......@@ -103,6 +108,7 @@
#define FF_MODE XE_REG_MCR(0x6210)
#define DIS_TE_AUTOSTRIP REG_BIT(31)
#define VS_HIT_MAX_VALUE_MASK REG_GENMASK(25, 20)
#define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16)
#define DIS_MESH_AUTOSTRIP REG_BIT(15)
......@@ -372,6 +378,11 @@
#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
#define XE2_GLOBAL_INVAL XE_REG(0xb404)
#define SCRATCH1LPFC XE_REG(0xb474)
#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
#define XE2_TDF_CTRL XE_REG(0xb418)
......@@ -395,6 +406,10 @@
#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12)
#define GLOBAL_INVALIDATION_MODE REG_BIT(2)
#define LMEM_CFG XE_REG(0xcf58)
#define LMEM_EN REG_BIT(31)
#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */
#define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
#define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0)
......
......@@ -15,8 +15,6 @@
#define GU_MISC_IRQ_OFFSET 0x444f0
#define GU_MISC_GSE REG_BIT(27)
#define SOFTWARE_FLAGS_SPR33 XE_REG(0x4f084)
#define GU_CNTL_PROTECTED XE_REG(0x10100C)
#define DRIVERINT_FLR_DIS REG_BIT(31)
......@@ -24,11 +22,14 @@
#define LMEM_INIT REG_BIT(7)
#define DRIVERFLR REG_BIT(31)
#define XEHP_CLOCK_GATE_DIS XE_REG(0x101014)
#define SGSI_SIDECLK_DIS REG_BIT(17)
#define GU_DEBUG XE_REG(0x101018)
#define DRIVERFLR_STATUS REG_BIT(31)
#define XEHP_CLOCK_GATE_DIS XE_REG(0x101014)
#define SGSI_SIDECLK_DIS REG_BIT(17)
#define VIRTUAL_CTRL_REG XE_REG(0x10108c)
#define GUEST_GTT_UPDATE_EN REG_BIT(8)
#define XEHP_MTCFG_ADDR XE_REG(0x101800)
#define TILE_COUNT REG_GENMASK(15, 8)
......@@ -66,6 +67,9 @@
#define DISPLAY_IRQ REG_BIT(16)
#define GT_DW_IRQ(x) REG_BIT(x)
#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF)
#define VF_CAP REG_BIT(0)
#define PVC_RP_STATE_CAP XE_REG(0x281014)
#endif
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2023 Intel Corporation
*/
#ifndef _REGS_XE_SRIOV_REGS_H_
#define _REGS_XE_SRIOV_REGS_H_
#include "regs/xe_reg_defs.h"
#define XE2_LMEM_CFG XE_REG(0x48b0)
#define LMEM_CFG XE_REG(0xcf58)
#define LMEM_EN REG_BIT(31)
#define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */
#define VIRTUAL_CTRL_REG XE_REG(0x10108c)
#define GUEST_GTT_UPDATE_EN REG_BIT(8)
#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF)
#define VF_CAP REG_BIT(0)
#endif
......@@ -2,11 +2,7 @@
# "live" kunit tests
obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_live_test.o
xe_live_test-y = xe_live_test_mod.o \
xe_bo_test.o \
xe_dma_buf_test.o \
xe_migrate_test.o \
xe_mocs_test.o
xe_live_test-y = xe_live_test_mod.o
# Normal kunit tests
obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o
......
......@@ -6,7 +6,7 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
#include "tests/xe_bo_test.h"
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "tests/xe_test.h"
......@@ -154,12 +154,18 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
static int ccs_test_run_device(struct xe_device *xe)
{
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
struct xe_tile *tile;
int id;
if (!xe_device_has_flat_ccs(xe)) {
kunit_info(test, "Skipping non-flat-ccs device.\n");
kunit_skip(test, "non-flat-ccs device\n");
return 0;
}
/* For xe2+ dgfx, we don't handle ccs metadata */
if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) {
kunit_skip(test, "xe2+ dgfx device\n");
return 0;
}
......@@ -177,11 +183,12 @@ static int ccs_test_run_device(struct xe_device *xe)
return 0;
}
void xe_ccs_migrate_kunit(struct kunit *test)
static void xe_ccs_migrate_kunit(struct kunit *test)
{
xe_call_for_each_device(ccs_test_run_device);
struct xe_device *xe = test->priv;
ccs_test_run_device(xe);
}
EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
{
......@@ -325,13 +332,12 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
static int evict_test_run_device(struct xe_device *xe)
{
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
struct xe_tile *tile;
int id;
if (!IS_DGFX(xe)) {
kunit_info(test, "Skipping non-discrete device %s.\n",
dev_name(xe->drm.dev));
kunit_skip(test, "non-discrete device\n");
return 0;
}
......@@ -345,8 +351,23 @@ static int evict_test_run_device(struct xe_device *xe)
return 0;
}
void xe_bo_evict_kunit(struct kunit *test)
static void xe_bo_evict_kunit(struct kunit *test)
{
xe_call_for_each_device(evict_test_run_device);
struct xe_device *xe = test->priv;
evict_test_run_device(xe);
}
EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
static struct kunit_case xe_bo_tests[] = {
KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
{}
};
VISIBLE_IF_KUNIT
struct kunit_suite xe_bo_test_suite = {
.name = "xe_bo",
.test_cases = xe_bo_tests,
.init = xe_kunit_helper_xe_device_live_test_init,
};
EXPORT_SYMBOL_IF_KUNIT(xe_bo_test_suite);
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright © 2022 Intel Corporation
*/
#include "xe_bo_test.h"
#include <kunit/test.h>
static struct kunit_case xe_bo_tests[] = {
KUNIT_CASE(xe_ccs_migrate_kunit),
KUNIT_CASE(xe_bo_evict_kunit),
{}
};
static struct kunit_suite xe_bo_test_suite = {
.name = "xe_bo",
.test_cases = xe_bo_tests,
};
kunit_test_suite(xe_bo_test_suite);
/* SPDX-License-Identifier: GPL-2.0 AND MIT */
/*
* Copyright © 2023 Intel Corporation
*/
#ifndef _XE_BO_TEST_H_
#define _XE_BO_TEST_H_
struct kunit;
void xe_ccs_migrate_kunit(struct kunit *test);
void xe_bo_evict_kunit(struct kunit *test);
#endif
......@@ -8,7 +8,7 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
#include "tests/xe_dma_buf_test.h"
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "xe_pci.h"
......@@ -107,7 +107,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
{
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
struct drm_gem_object *import;
struct dma_buf *dmabuf;
......@@ -258,7 +258,7 @@ static const struct dma_buf_test_params test_params[] = {
static int dma_buf_run_device(struct xe_device *xe)
{
const struct dma_buf_test_params *params;
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
xe_pm_runtime_get(xe);
for (params = test_params; params->mem_mask; ++params) {
......@@ -274,8 +274,22 @@ static int dma_buf_run_device(struct xe_device *xe)
return 0;
}
void xe_dma_buf_kunit(struct kunit *test)
static void xe_dma_buf_kunit(struct kunit *test)
{
xe_call_for_each_device(dma_buf_run_device);
struct xe_device *xe = test->priv;
dma_buf_run_device(xe);
}
EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_kunit);
static struct kunit_case xe_dma_buf_tests[] = {
KUNIT_CASE_PARAM(xe_dma_buf_kunit, xe_pci_live_device_gen_param),
{}
};
VISIBLE_IF_KUNIT
struct kunit_suite xe_dma_buf_test_suite = {
.name = "xe_dma_buf",
.test_cases = xe_dma_buf_tests,
.init = xe_kunit_helper_xe_device_live_test_init,
};
EXPORT_SYMBOL_IF_KUNIT(xe_dma_buf_test_suite);
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright © 2022 Intel Corporation
*/
#include "xe_dma_buf_test.h"
#include <kunit/test.h>
static struct kunit_case xe_dma_buf_tests[] = {
KUNIT_CASE(xe_dma_buf_kunit),
{}
};
static struct kunit_suite xe_dma_buf_test_suite = {
.name = "xe_dma_buf",
.test_cases = xe_dma_buf_tests,
};
kunit_test_suite(xe_dma_buf_test_suite);
/* SPDX-License-Identifier: GPL-2.0 AND MIT */
/*
* Copyright © 2023 Intel Corporation
*/
#ifndef _XE_DMA_BUF_TEST_H_
#define _XE_DMA_BUF_TEST_H_
struct kunit;
void xe_dma_buf_kunit(struct kunit *test);
#endif
......@@ -12,7 +12,9 @@
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_pm.h"
/**
* xe_kunit_helper_alloc_xe_device - Allocate a &xe_device for a KUnit test.
......@@ -88,3 +90,40 @@ int xe_kunit_helper_xe_device_test_init(struct kunit *test)
return 0;
}
EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_test_init);
KUNIT_DEFINE_ACTION_WRAPPER(put_xe_pm_runtime, xe_pm_runtime_put, struct xe_device *);
/**
* xe_kunit_helper_xe_device_live_test_init - Prepare a &xe_device for
* use in a live KUnit test.
* @test: the &kunit where live &xe_device will be used
*
* This function expects pointer to the &xe_device in the &test.param_value,
* like it is prepared by the &xe_pci_live_device_gen_param and stores that
* pointer as &kunit.priv to allow the test code to access it.
*
* This function makes sure that device is not wedged and then resumes it
* to avoid waking up the device inside the test. It uses deferred cleanup
* action to release a runtime_pm reference.
*
* This function can be used as custom implementation of &kunit_suite.init.
*
* This function uses KUNIT_ASSERT to detect any failures.
*
* Return: Always 0.
*/
int xe_kunit_helper_xe_device_live_test_init(struct kunit *test)
{
struct xe_device *xe = xe_device_const_cast(test->param_value);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
kunit_info(test, "running on %s device\n", xe->info.platform_name);
KUNIT_ASSERT_FALSE(test, xe_device_wedged(xe));
xe_pm_runtime_get(xe);
KUNIT_ASSERT_EQ(test, 0, kunit_add_action_or_reset(test, put_xe_pm_runtime, xe));
test->priv = xe;
return 0;
}
EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_live_test_init);
......@@ -14,4 +14,6 @@ struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test,
struct device *dev);
int xe_kunit_helper_xe_device_test_init(struct kunit *test);
int xe_kunit_helper_xe_device_live_test_init(struct kunit *test);
#endif
......@@ -3,6 +3,17 @@
* Copyright © 2023 Intel Corporation
*/
#include <linux/module.h>
#include <kunit/test.h>
extern struct kunit_suite xe_bo_test_suite;
extern struct kunit_suite xe_dma_buf_test_suite;
extern struct kunit_suite xe_migrate_test_suite;
extern struct kunit_suite xe_mocs_test_suite;
kunit_test_suite(xe_bo_test_suite);
kunit_test_suite(xe_dma_buf_test_suite);
kunit_test_suite(xe_migrate_test_suite);
kunit_test_suite(xe_mocs_test_suite);
MODULE_AUTHOR("Intel Corporation");
MODULE_LICENSE("GPL");
......
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright © 2022 Intel Corporation
*/
#include "xe_migrate_test.h"
#include <kunit/test.h>
static struct kunit_case xe_migrate_tests[] = {
KUNIT_CASE(xe_migrate_sanity_kunit),
{}
};
static struct kunit_suite xe_migrate_test_suite = {
.name = "xe_migrate",
.test_cases = xe_migrate_tests,
};
kunit_test_suite(xe_migrate_test_suite);
/* SPDX-License-Identifier: GPL-2.0 AND MIT */
/*
* Copyright © 2023 Intel Corporation
*/
#ifndef _XE_MIGRATE_TEST_H_
#define _XE_MIGRATE_TEST_H_
struct kunit;
void xe_migrate_sanity_kunit(struct kunit *test);
#endif
......@@ -6,7 +6,7 @@
#include <kunit/test.h>
#include <kunit/visibility.h>
#include "tests/xe_mocs_test.h"
#include "tests/xe_kunit_helpers.h"
#include "tests/xe_pci_test.h"
#include "tests/xe_test.h"
......@@ -23,7 +23,7 @@ struct live_mocs {
static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
{
unsigned int flags;
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
memset(arg, 0, sizeof(*arg));
......@@ -41,7 +41,7 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
static void read_l3cc_table(struct xe_gt *gt,
const struct xe_mocs_info *info)
{
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
u32 l3cc, l3cc_expected;
unsigned int i;
u32 reg_val;
......@@ -78,7 +78,7 @@ static void read_l3cc_table(struct xe_gt *gt,
static void read_mocs_table(struct xe_gt *gt,
const struct xe_mocs_info *info)
{
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
u32 mocs, mocs_expected;
unsigned int i;
u32 reg_val;
......@@ -134,11 +134,15 @@ static int mocs_kernel_test_run_device(struct xe_device *xe)
return 0;
}
void xe_live_mocs_kernel_kunit(struct kunit *test)
static void xe_live_mocs_kernel_kunit(struct kunit *test)
{
xe_call_for_each_device(mocs_kernel_test_run_device);
struct xe_device *xe = test->priv;
if (IS_SRIOV_VF(xe))
kunit_skip(test, "this test is N/A for VF");
mocs_kernel_test_run_device(xe);
}
EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit);
static int mocs_reset_test_run_device(struct xe_device *xe)
{
......@@ -148,7 +152,7 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
struct xe_gt *gt;
unsigned int flags;
int id;
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
xe_pm_runtime_get(xe);
......@@ -175,8 +179,26 @@ static int mocs_reset_test_run_device(struct xe_device *xe)
return 0;
}
void xe_live_mocs_reset_kunit(struct kunit *test)
static void xe_live_mocs_reset_kunit(struct kunit *test)
{
xe_call_for_each_device(mocs_reset_test_run_device);
struct xe_device *xe = test->priv;
if (IS_SRIOV_VF(xe))
kunit_skip(test, "this test is N/A for VF");
mocs_reset_test_run_device(xe);
}
EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_reset_kunit);
static struct kunit_case xe_mocs_tests[] = {
KUNIT_CASE_PARAM(xe_live_mocs_kernel_kunit, xe_pci_live_device_gen_param),
KUNIT_CASE_PARAM(xe_live_mocs_reset_kunit, xe_pci_live_device_gen_param),
{}
};
VISIBLE_IF_KUNIT
struct kunit_suite xe_mocs_test_suite = {
.name = "xe_mocs",
.test_cases = xe_mocs_tests,
.init = xe_kunit_helper_xe_device_live_test_init,
};
EXPORT_SYMBOL_IF_KUNIT(xe_mocs_test_suite);
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright © 2022 Intel Corporation
*/
#include "xe_mocs_test.h"
#include <kunit/test.h>
static struct kunit_case xe_mocs_tests[] = {
KUNIT_CASE(xe_live_mocs_kernel_kunit),
KUNIT_CASE(xe_live_mocs_reset_kunit),
{}
};
static struct kunit_suite xe_mocs_test_suite = {
.name = "xe_mocs",
.test_cases = xe_mocs_tests,
};
kunit_test_suite(xe_mocs_test_suite);
/* SPDX-License-Identifier: GPL-2.0 AND MIT */
/*
* Copyright © 2023 Intel Corporation
*/
#ifndef _XE_MOCS_TEST_H_
#define _XE_MOCS_TEST_H_
struct kunit;
void xe_live_mocs_kernel_kunit(struct kunit *test);
void xe_live_mocs_reset_kunit(struct kunit *test);
#endif
......@@ -167,3 +167,33 @@ int xe_pci_fake_device_init(struct xe_device *xe)
return 0;
}
EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
/**
* xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters
* @prev: the previously returned value, or NULL for the first iteration
* @desc: the buffer for a parameter name
*
* Iterates over the available Xe devices on the system. Uses the device name
* as the parameter name.
*
* To be used only as a parameter generator function in &KUNIT_CASE_PARAM.
*
* Return: pointer to the next &struct xe_device ready to be used as a parameter
* or NULL if there are no more Xe devices on the system.
*/
const void *xe_pci_live_device_gen_param(const void *prev, char *desc)
{
const struct xe_device *xe = prev;
struct device *dev = xe ? xe->drm.dev : NULL;
struct device *next;
next = driver_find_next_device(&xe_pci_driver.driver, dev);
if (dev)
put_device(dev);
if (!next)
return NULL;
snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s", dev_name(next));
return pdev_to_xe_device(to_pci_dev(next));
}
EXPORT_SYMBOL_IF_KUNIT(xe_pci_live_device_gen_param);
......@@ -16,7 +16,7 @@
static void check_graphics_ip(const struct xe_graphics_desc *graphics)
{
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
u64 mask = graphics->hw_engine_mask;
/* RCS, CCS, and BCS engines are allowed on the graphics IP */
......@@ -30,7 +30,7 @@ static void check_graphics_ip(const struct xe_graphics_desc *graphics)
static void check_media_ip(const struct xe_media_desc *media)
{
struct kunit *test = xe_cur_kunit();
struct kunit *test = kunit_get_current_test();
u64 mask = media->hw_engine_mask;
/* VCS, VECS and GSCCS engines are allowed on the media IP */
......
......@@ -35,4 +35,6 @@ struct xe_pci_fake_data {
int xe_pci_fake_device_init(struct xe_device *xe);
const void *xe_pci_live_device_gen_param(const void *prev, char *desc);
#endif
This diff is collapsed.
......@@ -9,8 +9,8 @@
#include <linux/types.h>
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include <linux/sched.h>
#include <kunit/test.h>
#include <kunit/test-bug.h>
/*
* Each test that provides a kunit private test structure, place a test id
......@@ -31,8 +31,6 @@ struct xe_test_priv {
#define XE_TEST_DECLARE(x) x
#define XE_TEST_ONLY(x) unlikely(x)
#define XE_TEST_EXPORT
#define xe_cur_kunit() current->kunit_test
/**
* xe_cur_kunit_priv - Obtain the struct xe_test_priv pointed to by
......@@ -48,10 +46,10 @@ xe_cur_kunit_priv(enum xe_test_priv_id id)
{
struct xe_test_priv *priv;
if (!xe_cur_kunit())
if (!kunit_get_current_test())
return NULL;
priv = xe_cur_kunit()->priv;
priv = kunit_get_current_test()->priv;
return priv->id == id ? priv : NULL;
}
......@@ -59,8 +57,6 @@ xe_cur_kunit_priv(enum xe_test_priv_id id)
#define XE_TEST_DECLARE(x)
#define XE_TEST_ONLY(x) 0
#define XE_TEST_EXPORT static
#define xe_cur_kunit() NULL
#define xe_cur_kunit_priv(_id) NULL
#endif
......
......@@ -74,6 +74,7 @@ static const struct platform_test_case cases[] = {
GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1),
};
static void platform_desc(const struct platform_test_case *t, char *desc)
......
......@@ -1264,13 +1264,14 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
!(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
(flags & XE_BO_NEEDS_64K))) {
aligned_size = ALIGN(size, SZ_64K);
(flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
aligned_size = ALIGN(size, align);
if (type != ttm_bo_type_device)
size = ALIGN(size, SZ_64K);
size = ALIGN(size, align);
flags |= XE_BO_FLAG_INTERNAL_64K;
alignment = SZ_64K >> PAGE_SHIFT;
alignment = align >> PAGE_SHIFT;
} else {
aligned_size = ALIGN(size, SZ_4K);
flags &= ~XE_BO_FLAG_INTERNAL_64K;
......
......@@ -36,8 +36,9 @@
#define XE_BO_FLAG_PAGETABLE BIT(12)
#define XE_BO_FLAG_NEEDS_CPU_ACCESS BIT(13)
#define XE_BO_FLAG_NEEDS_UC BIT(14)
#define XE_BO_NEEDS_64K BIT(15)
#define XE_BO_FLAG_GGTT_INVALIDATE BIT(16)
#define XE_BO_FLAG_NEEDS_64K BIT(15)
#define XE_BO_FLAG_NEEDS_2M BIT(16)
#define XE_BO_FLAG_GGTT_INVALIDATE BIT(17)
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
#define XE_BO_FLAG_INTERNAL_64K BIT(31)
......
......@@ -58,6 +58,8 @@ struct xe_bo {
#endif
/** @freed: List node for delayed put. */
struct llist_node freed;
/** @update_index: Update index if PT BO */
int update_index;
/** @created: Whether the bo has passed initial creation */
bool created;
......
......@@ -171,7 +171,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
u32 adj_logical_mask = q->logical_mask;
u32 width_mask = (0x1 << q->width) - 1;
const char *process_name = "no process";
struct task_struct *task = NULL;
int i;
bool cookie;
......@@ -179,14 +178,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
if (q->vm && q->vm->xef) {
task = get_pid_task(q->vm->xef->drm->pid, PIDTYPE_PID);
if (task)
process_name = task->comm;
}
if (q->vm && q->vm->xef)
process_name = q->vm->xef->process_name;
strscpy(ss->process_name, process_name);
if (task)
put_task_struct(task);
ss->gt = q->gt;
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
......
......@@ -54,6 +54,9 @@
#include "xe_vm.h"
#include "xe_vram.h"
#include "xe_wait_user_fence.h"
#include "xe_wa.h"
#include <generated/xe_wa_oob.h>
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
......@@ -61,6 +64,7 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
struct xe_drm_client *client;
struct xe_file *xef;
int ret = -ENOMEM;
struct task_struct *task = NULL;
xef = kzalloc(sizeof(*xef), GFP_KERNEL);
if (!xef)
......@@ -87,9 +91,63 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
spin_unlock(&xe->clients.lock);
file->driver_priv = xef;
kref_init(&xef->refcount);
task = get_pid_task(rcu_access_pointer(file->pid), PIDTYPE_PID);
if (task) {
xef->process_name = kstrdup(task->comm, GFP_KERNEL);
xef->pid = task->pid;
put_task_struct(task);
}
return 0;
}
static void xe_file_destroy(struct kref *ref)
{
struct xe_file *xef = container_of(ref, struct xe_file, refcount);
struct xe_device *xe = xef->xe;
xa_destroy(&xef->exec_queue.xa);
mutex_destroy(&xef->exec_queue.lock);
xa_destroy(&xef->vm.xa);
mutex_destroy(&xef->vm.lock);
spin_lock(&xe->clients.lock);
xe->clients.count--;
spin_unlock(&xe->clients.lock);
xe_drm_client_put(xef->client);
kfree(xef->process_name);
kfree(xef);
}
/**
* xe_file_get() - Take a reference to the xe file object
* @xef: Pointer to the xe file
*
* Anyone with a pointer to xef must take a reference to the xe file
* object using this call.
*
* Return: xe file pointer
*/
struct xe_file *xe_file_get(struct xe_file *xef)
{
kref_get(&xef->refcount);
return xef;
}
/**
* xe_file_put() - Drop a reference to the xe file object
* @xef: Pointer to the xe file
*
* Used to drop reference to the xef object
*/
void xe_file_put(struct xe_file *xef)
{
kref_put(&xef->refcount, xe_file_destroy);
}
static void xe_file_close(struct drm_device *dev, struct drm_file *file)
{
struct xe_device *xe = to_xe_device(dev);
......@@ -98,6 +156,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
struct xe_exec_queue *q;
unsigned long idx;
xe_pm_runtime_get(xe);
/*
* No need for exec_queue.lock here as there is no contention for it
* when FD is closing as IOCTLs presumably can't be modifying the
......@@ -108,21 +168,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
xe_exec_queue_kill(q);
xe_exec_queue_put(q);
}
xa_destroy(&xef->exec_queue.xa);
mutex_destroy(&xef->exec_queue.lock);
mutex_lock(&xef->vm.lock);
xa_for_each(&xef->vm.xa, idx, vm)
xe_vm_close_and_put(vm);
mutex_unlock(&xef->vm.lock);
xa_destroy(&xef->vm.xa);
mutex_destroy(&xef->vm.lock);
spin_lock(&xe->clients.lock);
xe->clients.count--;
spin_unlock(&xe->clients.lock);
xe_file_put(xef);
xe_drm_client_put(xef->client);
kfree(xef);
xe_pm_runtime_put(xe);
}
static const struct drm_ioctl_desc xe_ioctls[] = {
......@@ -744,13 +797,22 @@ void xe_device_shutdown(struct xe_device *xe)
{
}
/**
* xe_device_wmb() - Device specific write memory barrier
* @xe: the &xe_device
*
* While wmb() is sufficient for a barrier if we use system memory, on discrete
* platforms with device memory we additionally need to issue a register write.
* Since it doesn't matter which register we write to, use the read-only VF_CAP
* register that is also marked as accessible by the VFs.
*/
void xe_device_wmb(struct xe_device *xe)
{
struct xe_gt *gt = xe_root_mmio_gt(xe);
wmb();
if (IS_DGFX(xe))
xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0);
xe_mmio_write32(gt, VF_CAP_REG, 0);
}
/**
......@@ -779,6 +841,11 @@ void xe_device_td_flush(struct xe_device *xe)
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
return;
if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
xe_device_l2_flush(xe);
return;
}
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
continue;
......@@ -802,6 +869,30 @@ void xe_device_td_flush(struct xe_device *xe)
}
}
void xe_device_l2_flush(struct xe_device *xe)
{
struct xe_gt *gt;
int err;
gt = xe_root_mmio_gt(xe);
if (!XE_WA(gt, 16023588340))
return;
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
return;
spin_lock(&gt->global_invl_lock);
xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?
......
......@@ -20,6 +20,11 @@ static inline struct xe_device *pdev_to_xe_device(struct pci_dev *pdev)
return pci_get_drvdata(pdev);
}
static inline struct xe_device *xe_device_const_cast(const struct xe_device *xe)
{
return (struct xe_device *)xe;
}
static inline struct xe_device *ttm_to_xe_device(struct ttm_device *ttm)
{
return container_of(ttm, struct xe_device, ttm);
......@@ -162,6 +167,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
void xe_device_td_flush(struct xe_device *xe);
void xe_device_l2_flush(struct xe_device *xe);
static inline bool xe_device_wedged(struct xe_device *xe)
{
......@@ -170,4 +176,7 @@ static inline bool xe_device_wedged(struct xe_device *xe)
void xe_device_declare_wedged(struct xe_device *xe);
struct xe_file *xe_file_get(struct xe_file *xef);
void xe_file_put(struct xe_file *xef);
#endif
......@@ -23,6 +23,10 @@
#include "xe_sriov_types.h"
#include "xe_step_types.h"
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
#define TEST_VM_OPS_ERROR
#endif
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
#include "soc/intel_pch.h"
#include "intel_display_core.h"
......@@ -40,6 +44,7 @@ struct xe_pat_ops;
#define MEDIA_VERx100(xe) ((xe)->info.media_verx100)
#define IS_DGFX(xe) ((xe)->info.is_dgfx)
#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi)
#define HAS_HECI_CSCFI(xe) ((xe)->info.has_heci_cscfi)
#define XE_VRAM_FLAGS_NEED64K BIT(0)
......@@ -285,6 +290,8 @@ struct xe_device {
u8 skip_pcode:1;
/** @info.has_heci_gscfi: device has heci gscfi */
u8 has_heci_gscfi:1;
/** @info.has_heci_cscfi: device has heci cscfi */
u8 has_heci_cscfi:1;
/** @info.skip_guc_pc: Skip GuC based PM feature init */
u8 skip_guc_pc:1;
/** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */
......@@ -477,6 +484,14 @@ struct xe_device {
int mode;
} wedged;
#ifdef TEST_VM_OPS_ERROR
/**
* @vm_inject_error_position: inject errors at different places in VM
* bind IOCTL based on this value
*/
u8 vm_inject_error_position;
#endif
/* private: */
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
......@@ -566,6 +581,21 @@ struct xe_file {
/** @client: drm client */
struct xe_drm_client *client;
/**
* @process_name: process name for file handle, used to safely output
* during error situations where xe file can outlive process
*/
char *process_name;
/**
* @pid: pid for file handle, used to safely output uring error
* situations where xe file can outlive process
*/
pid_t pid;
/** @refcount: ref count of this xe file */
struct kref refcount;
};
#endif
......@@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file)
/* Accumulate all the exec queues from this client */
mutex_lock(&xef->exec_queue.lock);
xa_for_each(&xef->exec_queue.xa, i, q) {
xa_for_each(&xef->exec_queue.xa, i, q)
xe_exec_queue_update_run_ticks(q);
xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks;
q->old_run_ticks = q->run_ticks;
}
mutex_unlock(&xef->exec_queue.lock);
/* Get the total GPU cycles */
......
......@@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
{
if (q->vm)
xe_vm_put(q->vm);
if (q->xef)
xe_file_put(q->xef);
kfree(q);
}
......@@ -649,6 +653,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
goto kill_exec_queue;
args->exec_queue_id = id;
q->xef = xe_file_get(xef);
return 0;
......@@ -762,6 +767,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
*/
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
struct xe_file *xef;
struct xe_lrc *lrc;
u32 old_ts, new_ts;
......@@ -773,6 +779,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
if (!q->vm || !q->vm->xef)
return;
xef = q->vm->xef;
/*
* Only sample the first LRC. For parallel submission, all of them are
* scheduled together and we compensate that below by multiplying by
......@@ -783,7 +791,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
*/
lrc = q->lrc[0];
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
q->run_ticks += (new_ts - old_ts) * q->width;
xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
}
void xe_exec_queue_kill(struct xe_exec_queue *q)
......@@ -906,3 +914,26 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
xe_exec_queue_last_fence_put(q, vm);
q->last_fence = dma_fence_get(fence);
}
/**
* xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue
* @q: The exec queue
* @vm: The VM the engine does a bind or exec for
*
* Returns:
* -ETIME if there exists an unsignalled last fence dependency, zero otherwise.
*/
int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm)
{
struct dma_fence *fence;
int err = 0;
fence = xe_exec_queue_last_fence_get(q, vm);
if (fence) {
err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ?
0 : -ETIME;
dma_fence_put(fence);
}
return err;
}
......@@ -75,6 +75,8 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
struct xe_vm *vm);
void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
struct dma_fence *fence);
int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,
struct xe_vm *vm);
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
#endif
......@@ -38,6 +38,9 @@ enum xe_exec_queue_priority {
* a kernel object.
*/
struct xe_exec_queue {
/** @xef: Back pointer to xe file if this is user created exec queue */
struct xe_file *xef;
/** @gt: graphics tile this exec queue can submit to */
struct xe_gt *gt;
/**
......@@ -139,10 +142,6 @@ struct xe_exec_queue {
* Protected by @vm's resv. Unused if @vm == NULL.
*/
u64 tlb_flush_seqno;
/** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */
u64 old_run_ticks;
/** @run_ticks: hw engine class run time in ticks for this exec queue */
u64 run_ticks;
/** @lrc: logical ring context for this exec queue */
struct xe_lrc *lrc[];
};
......@@ -172,9 +171,11 @@ struct xe_exec_queue_ops {
int (*suspend)(struct xe_exec_queue *q);
/**
* @suspend_wait: Wait for an exec queue to suspend executing, should be
* call after suspend.
* call after suspend. In dma-fencing path thus must return within a
* reasonable amount of time. -ETIME return shall indicate an error
* waiting for suspend resulting in associated VM getting killed.
*/
void (*suspend_wait)(struct xe_exec_queue *q);
int (*suspend_wait)(struct xe_exec_queue *q);
/**
* @resume: Resume exec queue execution, exec queue must be in a suspended
* state and dma fence returned from most recent suspend call must be
......
......@@ -422,10 +422,11 @@ static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
return 0;
}
static void execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
{
/* NIY */
return 0;
}
static void execlist_exec_queue_resume(struct xe_exec_queue *q)
......
......@@ -97,19 +97,27 @@ static int parse(FILE *input, FILE *csource, FILE *cheader)
if (name) {
fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx);
fprintf(csource, "{ XE_RTP_NAME(\"%s\"), XE_RTP_RULES(%s) },\n",
/* Close previous entry before starting a new one */
if (idx)
fprintf(csource, ") },\n");
fprintf(csource, "{ XE_RTP_NAME(\"%s\"),\n XE_RTP_RULES(%s",
name, rules);
idx++;
} else {
fprintf(csource, "{ XE_RTP_NAME(NULL), XE_RTP_RULES(%s) },\n",
rules);
fprintf(csource, ", OR,\n\t%s", rules);
}
idx++;
lineno++;
if (!is_continuation)
prev_name = name;
}
/* Close last entry */
if (idx)
fprintf(csource, ") },\n");
fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx);
return 0;
......
......@@ -9,6 +9,7 @@
#include <drm/drm_managed.h>
#include <drm/xe_drm.h>
#include <generated/xe_wa_oob.h>
#include "instructions/xe_gfxpipe_commands.h"
......@@ -95,6 +96,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
gt->uc.guc.submission_state.enabled = false;
}
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
{
u32 reg;
int err;
if (!XE_WA(gt, 16023588340))
return;
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (WARN_ON(err))
return;
if (!xe_gt_is_media_type(gt)) {
xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
reg |= CG_DIS_CNTLBUS;
xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
}
xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
{
u32 reg;
int err;
if (!XE_WA(gt, 16023588340))
return;
if (xe_gt_is_media_type(gt))
return;
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (WARN_ON(err))
return;
reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
reg &= ~CG_DIS_CNTLBUS;
xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
/**
* xe_gt_remove() - Clean up the GT structures before driver removal
* @gt: the GT object
......@@ -111,6 +157,8 @@ void xe_gt_remove(struct xe_gt *gt)
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
xe_gt_disable_host_l2_vram(gt);
}
static void gt_reset_worker(struct work_struct *w);
......@@ -339,6 +387,7 @@ int xe_gt_init_early(struct xe_gt *gt)
xe_force_wake_init_gt(gt, gt_to_fw(gt));
xe_pcode_init(gt);
spin_lock_init(&gt->global_invl_lock);
return 0;
}
......@@ -508,6 +557,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
xe_gt_enable_host_l2_vram(gt);
err = xe_uc_init(&gt->uc);
if (err)
......@@ -643,6 +693,8 @@ static int do_gt_restart(struct xe_gt *gt)
xe_pat_init(gt);
xe_gt_enable_host_l2_vram(gt);
xe_gt_mcr_set_implicit_defaults(gt);
xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
......@@ -796,6 +848,8 @@ int xe_gt_suspend(struct xe_gt *gt)
xe_gt_idle_disable_pg(gt);
xe_gt_disable_host_l2_vram(gt);
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_gt_dbg(gt, "suspended\n");
......
......@@ -5,7 +5,7 @@
#include <drm/drm_managed.h>
#include "regs/xe_sriov_regs.h"
#include "regs/xe_regs.h"
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_pf_config.h"
......
......@@ -1401,6 +1401,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
ALIGN(size, PAGE_SIZE),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_NEEDS_2M |
XE_BO_FLAG_PINNED);
if (IS_ERR(bo))
return PTR_ERR(bo);
......
......@@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr)
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key),
return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key),
vf_runtime_reg_cmp);
}
......@@ -892,6 +892,32 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
return rr->value;
}
/**
* xe_gt_sriov_vf_write32 - Handle a write to an inaccessible register.
* @gt: the &xe_gt
* @reg: the register to write
* @val: value to write
*
* This function is for VF use only.
* Currently it will trigger a WARN if running on debug build.
*/
void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
{
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
xe_gt_assert(gt, !reg.vf);
/*
* In the future, we may want to handle selected writes to inaccessible
* registers in some custom way, but for now let's just log a warning
* about such attempt, as likely we might be doing something wrong.
*/
xe_gt_WARN(gt, IS_ENABLED(CONFIG_DRM_XE_DEBUG),
"VF is trying to write %#x to an inaccessible register %#x+%#x\n",
val, reg.addr, addr - reg.addr);
}
/**
* xe_gt_sriov_vf_print_config - Print VF self config.
* @gt: the &xe_gt
......
......@@ -22,6 +22,7 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt);
u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg);
void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p);
......
This diff is collapsed.
......@@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
u64 start, u64 end, u32 asid);
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
bool stack);
void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence);
static inline void
xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
{
dma_fence_wait(&fence->base, false);
}
#endif /* _XE_GT_TLB_INVALIDATION_ */
......@@ -8,6 +8,8 @@
#include <linux/dma-fence.h>
struct xe_gt;
/**
* struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
*
......@@ -17,6 +19,8 @@
struct xe_gt_tlb_invalidation_fence {
/** @base: dma fence base */
struct dma_fence base;
/** @gt: GT which fence belong to */
struct xe_gt *gt;
/** @link: link into list of pending tlb fences */
struct list_head link;
/** @seqno: seqno of TLB invalidation to signal fence one */
......
......@@ -6,6 +6,7 @@
#include "xe_gt_topology.h"
#include <linux/bitmap.h>
#include <linux/compiler.h>
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
......@@ -31,7 +32,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
}
static void
load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
{
struct xe_device *xe = gt_to_xe(gt);
u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
......@@ -47,11 +48,13 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
if (GRAPHICS_VERx100(xe) < 1250)
reg_val = ~reg_val & XELP_EU_MASK;
/* On PVC, one bit = one EU */
if (GRAPHICS_VERx100(xe) == 1260) {
if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
/* SIMD16 EUs, one bit == one EU */
*eu_type = XE_GT_EU_TYPE_SIMD16;
val = reg_val;
} else {
/* All other platforms, one bit = 2 EU */
/* SIMD8 EUs, one bit == 2 EU */
*eu_type = XE_GT_EU_TYPE_SIMD8;
for (i = 0; i < fls(reg_val); i++)
if (reg_val & BIT(i))
val |= 0x3 << 2 * i;
......@@ -213,7 +216,7 @@ xe_gt_topology_init(struct xe_gt *gt)
XEHP_GT_COMPUTE_DSS_ENABLE,
XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
XE2_GT_COMPUTE_DSS_2);
load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type);
load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
......@@ -221,6 +224,18 @@ xe_gt_topology_init(struct xe_gt *gt)
xe_gt_topology_dump(gt, &p);
}
static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
{
switch (eu_type) {
case XE_GT_EU_TYPE_SIMD16:
return "simd16";
case XE_GT_EU_TYPE_SIMD8:
return "simd8";
}
return NULL;
}
void
xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
{
......@@ -231,6 +246,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS,
gt->fuse_topo.eu_mask_per_dss);
drm_printf(p, "EU type: %s\n",
eu_type_to_str(gt->fuse_topo.eu_type));
drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
gt->fuse_topo.l3_bank_mask);
......
......@@ -27,6 +27,11 @@ enum xe_gt_type {
XE_GT_TYPE_MEDIA,
};
enum xe_gt_eu_type {
XE_GT_EU_TYPE_SIMD8,
XE_GT_EU_TYPE_SIMD16,
};
#define XE_MAX_DSS_FUSE_REGS 3
#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
#define XE_MAX_EU_FUSE_REGS 1
......@@ -343,6 +348,12 @@ struct xe_gt {
/** @fuse_topo.l3_bank_mask: L3 bank mask */
xe_l3_bank_mask_t l3_bank_mask;
/**
* @fuse_topo.eu_type: type/width of EU stored in
* fuse_topo.eu_mask_per_dss
*/
enum xe_gt_eu_type eu_type;
} fuse_topo;
/** @steering: register steering for individual HW units */
......@@ -362,6 +373,12 @@ struct xe_gt {
*/
spinlock_t mcr_lock;
/**
* @global_invl_lock: protects the register for the duration
* of a global invalidation of l2 cache
*/
spinlock_t global_invl_lock;
/** @wa_active: keep track of active workarounds */
struct {
/** @wa_active.gt: bitmap with active GT workarounds */
......@@ -370,8 +387,14 @@ struct xe_gt {
unsigned long *engine;
/** @wa_active.lrc: bitmap with active LRC workarounds */
unsigned long *lrc;
/** @wa_active.oob: bitmap with active OOB workaroudns */
/** @wa_active.oob: bitmap with active OOB workarounds */
unsigned long *oob;
/**
* @wa_active.oob_initialized: mark oob as initialized to help
* detecting misuse of XE_WA() - it can only be called on
* initialization after OOB WAs have being processed
*/
bool oob_initialized;
} wa_active;
/** @user_engines: engines present in GT and available to userspace */
......
......@@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
state == XE_GUC_CT_STATE_STOPPED);
if (ct->g2h_outstanding)
xe_pm_runtime_put(ct_to_xe(ct));
ct->g2h_outstanding = 0;
ct->state = state;
......@@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
{
xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) ||
(g2h_len && num_g2h));
if (g2h_len) {
lockdep_assert_held(&ct->fast_lock);
if (!ct->g2h_outstanding)
xe_pm_runtime_get_noresume(ct_to_xe(ct));
ct->ctbs.g2h.info.space -= g2h_len;
ct->g2h_outstanding += num_g2h;
}
......@@ -509,9 +516,11 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
lockdep_assert_held(&ct->fast_lock);
xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <=
ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding);
ct->ctbs.g2h.info.space += g2h_len;
--ct->g2h_outstanding;
if (!--ct->g2h_outstanding)
xe_pm_runtime_put(ct_to_xe(ct));
}
static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
......
......@@ -97,8 +97,8 @@ int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit)
if (ret)
return ret;
xe_gt_info(idm_to_gt(idm), "using %u GUC ID%s\n",
idm->total, str_plural(idm->total));
xe_gt_dbg(idm_to_gt(idm), "using %u GuC ID%s\n",
idm->total, str_plural(idm->total));
return 0;
}
......
......@@ -1071,7 +1071,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
struct xe_exec_queue *q = job->q;
struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_guc *guc = exec_queue_to_guc(q);
const char *process_name = "no process";
int err = -ETIME;
pid_t pid = -1;
int i = 0;
bool wedged, skip_timeout_check;
......@@ -1168,9 +1170,14 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
goto sched_enable;
}
xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
if (q->vm && q->vm->xef) {
process_name = q->vm->xef->process_name;
pid = q->vm->xef->pid;
}
xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
q->guc->id, q->flags);
q->guc->id, q->flags, process_name, pid);
trace_xe_sched_job_timedout(job);
if (!exec_queue_killed(q))
......@@ -1312,6 +1319,15 @@ static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *ms
kfree(msg);
}
static void __suspend_fence_signal(struct xe_exec_queue *q)
{
if (!q->guc->suspend_pending)
return;
WRITE_ONCE(q->guc->suspend_pending, false);
wake_up(&q->guc->suspend_wait);
}
static void suspend_fence_signal(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
......@@ -1321,9 +1337,7 @@ static void suspend_fence_signal(struct xe_exec_queue *q)
guc_read_stopped(guc));
xe_assert(xe, q->guc->suspend_pending);
q->guc->suspend_pending = false;
smp_wmb();
wake_up(&q->guc->suspend_wait);
__suspend_fence_signal(q);
}
static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
......@@ -1375,6 +1389,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
{
struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
trace_xe_sched_msg_recv(msg);
switch (msg->opcode) {
......@@ -1393,6 +1409,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
default:
XE_WARN_ON("Unknown message type");
}
xe_pm_runtime_put(xe);
}
static const struct drm_sched_backend_ops drm_sched_ops = {
......@@ -1476,12 +1494,15 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q)
{
trace_xe_exec_queue_kill(q);
set_exec_queue_killed(q);
__suspend_fence_signal(q);
xe_guc_exec_queue_trigger_cleanup(q);
}
static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
u32 opcode)
{
xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
INIT_LIST_HEAD(&msg->link);
msg->opcode = opcode;
msg->private_data = q;
......@@ -1572,12 +1593,31 @@ static int guc_exec_queue_suspend(struct xe_exec_queue *q)
return 0;
}
static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
int ret;
/*
* Likely don't need to check exec_queue_killed() as we clear
* suspend_pending upon kill but to be paranoid but races in which
* suspend_pending is set after kill also check kill here.
*/
ret = wait_event_timeout(q->guc->suspend_wait,
!READ_ONCE(q->guc->suspend_pending) ||
exec_queue_killed(q) ||
guc_read_stopped(guc),
HZ * 5);
wait_event(q->guc->suspend_wait, !q->guc->suspend_pending ||
guc_read_stopped(guc));
if (!ret) {
xe_gt_warn(guc_to_gt(guc),
"Suspend fence, guc_id=%d, failed to respond",
q->guc->id);
/* XXX: Trigger GT reset? */
return -ETIME;
}
return 0;
}
static void guc_exec_queue_resume(struct xe_exec_queue *q)
......
......@@ -92,7 +92,7 @@ void xe_heci_gsc_fini(struct xe_device *xe)
{
struct xe_heci_gsc *heci_gsc = &xe->heci_gsc;
if (!HAS_HECI_GSCFI(xe))
if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe))
return;
if (heci_gsc->adev) {
......@@ -177,12 +177,14 @@ void xe_heci_gsc_init(struct xe_device *xe)
const struct heci_gsc_def *def;
int ret;
if (!HAS_HECI_GSCFI(xe))
if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe))
return;
heci_gsc->irq = -1;
if (xe->info.platform == XE_PVC) {
if (xe->info.platform == XE_BATTLEMAGE) {
def = &heci_gsc_def_dg2;
} else if (xe->info.platform == XE_PVC) {
def = &heci_gsc_def_pvc;
} else if (xe->info.platform == XE_DG2) {
def = &heci_gsc_def_dg2;
......@@ -232,3 +234,23 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
if (ret)
drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
}
void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir)
{
int ret;
if ((iir & CSC_IRQ_INTF(1)) == 0)
return;
if (!HAS_HECI_CSCFI(xe)) {
drm_warn_once(&xe->drm, "CSC irq: not supported");
return;
}
if (xe->heci_gsc.irq < 0)
return;
ret = generic_handle_irq(xe->heci_gsc.irq);
if (ret)
drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
}
......@@ -11,10 +11,15 @@ struct xe_device;
struct mei_aux_device;
/*
* The HECI1 bit corresponds to bit15 and HECI2 to bit14.
* GSC HECI1 bit corresponds to bit15 and HECI2 to bit14.
* The reason for this is to allow growth for more interfaces in the future.
*/
#define GSC_IRQ_INTF(_x) BIT(15 - (_x))
#define GSC_IRQ_INTF(_x) BIT(15 - (_x))
/*
* CSC HECI1 bit corresponds to bit9 and HECI2 to bit10.
*/
#define CSC_IRQ_INTF(_x) BIT(9 + (_x))
/**
* struct xe_heci_gsc - graphics security controller for xe, HECI interface
......@@ -31,5 +36,6 @@ struct xe_heci_gsc {
void xe_heci_gsc_init(struct xe_device *xe);
void xe_heci_gsc_fini(struct xe_device *xe);
void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir);
void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir);
#endif /* __XE_HECI_GSC_DEV_H__ */
......@@ -459,6 +459,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
* the primary tile.
*/
if (id == 0) {
if (HAS_HECI_CSCFI(xe))
xe_heci_csc_irq_handler(xe, master_ctl);
xe_display_irq_handler(xe, master_ctl);
gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
}
......
......@@ -7,7 +7,7 @@
#include <drm/drm_managed.h>
#include "regs/xe_sriov_regs.h"
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
......@@ -71,7 +71,7 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
lmtt->ops->lmtt_pte_num(level)),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
XE_BO_NEEDS_64K | XE_BO_FLAG_PINNED);
XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_PINNED);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_free_pt;
......
This diff is collapsed.
......@@ -47,6 +47,24 @@ struct xe_migrate_pt_update_ops {
struct xe_tile *tile, struct iosys_map *map,
void *pos, u32 ofs, u32 num_qwords,
const struct xe_vm_pgtable_update *update);
/**
* @clear: Clear a command buffer or page-table with ptes.
* @pt_update: Embeddable callback argument.
* @tile: The tile for the current operation.
* @map: struct iosys_map into the memory to be populated.
* @pos: If @map is NULL, map into the memory to be populated.
* @ofs: qword offset into @map, unused if @map is NULL.
* @num_qwords: Number of qwords to write.
* @update: Information about the PTEs to be inserted.
*
* This interface is intended to be used as a callback into the
* page-table system to populate command buffers or shared
* page-tables with PTEs.
*/
void (*clear)(struct xe_migrate_pt_update *pt_update,
struct xe_tile *tile, struct iosys_map *map,
void *pos, u32 ofs, u32 num_qwords,
const struct xe_vm_pgtable_update *update);
/**
* @pre_commit: Callback to be called just before arming the
......@@ -67,14 +85,10 @@ struct xe_migrate_pt_update_ops {
struct xe_migrate_pt_update {
/** @ops: Pointer to the struct xe_migrate_pt_update_ops callbacks */
const struct xe_migrate_pt_update_ops *ops;
/** @vma: The vma we're updating the pagetable for. */
struct xe_vma *vma;
/** @vops: VMA operations */
struct xe_vma_ops *vops;
/** @job: The job if a GPU page-table update. NULL otherwise */
struct xe_sched_job *job;
/** @start: Start of update for the range fence */
u64 start;
/** @last: Last of update for the range fence */
u64 last;
/** @tile_id: Tile ID of the update */
u8 tile_id;
};
......@@ -96,15 +110,9 @@ struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
struct dma_fence *
xe_migrate_update_pgtables(struct xe_migrate *m,
struct xe_vm *vm,
struct xe_bo *bo,
struct xe_exec_queue *q,
const struct xe_vm_pgtable_update *updates,
u32 num_updates,
struct xe_sync_entry *syncs, u32 num_syncs,
struct xe_migrate_pt_update *pt_update);
void xe_migrate_wait(struct xe_migrate *m);
struct xe_exec_queue *xe_tile_migrate_engine(struct xe_tile *tile);
struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile);
#endif
This diff is collapsed.
......@@ -22,7 +22,6 @@ u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set);
int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval);
bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg);
int xe_mmio_probe_vram(struct xe_device *xe);
u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
u32 *out_val, bool atomic);
......
......@@ -641,7 +641,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
u32 offset = xe_bo_ggtt_addr(lrc->bo);
do {
bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2;
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
bb->cs[bb->len++] = offset + flex->offset * sizeof(u32);
bb->cs[bb->len++] = 0;
bb->cs[bb->len++] = flex->value;
......
......@@ -7,6 +7,8 @@
#include <drm/xe_drm.h>
#include <generated/xe_wa_oob.h>
#include "regs/xe_reg_defs.h"
#include "xe_assert.h"
#include "xe_device.h"
......@@ -15,6 +17,7 @@
#include "xe_gt_mcr.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
#include "xe_wa.h"
#define _PAT_ATS 0x47fc
#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
......@@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
if (GRAPHICS_VER(xe) == 20) {
xe->pat.ops = &xe2_pat_ops;
xe->pat.table = xe2_pat_table;
xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
/* Wa_16023588340. XXX: Should use XE_WA */
if (GRAPHICS_VERx100(xe) == 2001)
xe->pat.n_entries = 28; /* Disable CLOS3 */
else
xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
xe->pat.idx[XE_CACHE_NONE] = 3;
xe->pat.idx[XE_CACHE_WT] = 15;
xe->pat.idx[XE_CACHE_WB] = 2;
......
......@@ -59,6 +59,7 @@ struct xe_device_desc {
u8 has_display:1;
u8 has_heci_gscfi:1;
u8 has_heci_cscfi:1;
u8 has_llc:1;
u8 has_mmio_ext:1;
u8 has_sriov:1;
......@@ -345,6 +346,7 @@ static const struct xe_device_desc bmg_desc = {
PLATFORM(BATTLEMAGE),
.has_display = true,
.require_force_probe = true,
.has_heci_cscfi = 1,
};
#undef PLATFORM
......@@ -606,6 +608,7 @@ static int xe_info_init_early(struct xe_device *xe,
xe->info.is_dgfx = desc->is_dgfx;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_heci_cscfi = desc->has_heci_cscfi;
xe->info.has_llc = desc->has_llc;
xe->info.has_mmio_ext = desc->has_mmio_ext;
xe->info.has_sriov = desc->has_sriov;
......@@ -815,7 +818,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d",
drm_dbg(&xe->drm, "%s %s %04x:%04x dgfx:%d gfx:%s (%d.%02d) media:%s (%d.%02d) display:%s dma_m_s:%d tc:%d gscfi:%d cscfi:%d",
desc->platform_name,
subplatform_desc ? subplatform_desc->name : "",
xe->info.devid, xe->info.revid,
......@@ -828,7 +831,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
xe->info.media_verx100 % 100,
str_yes_no(xe->info.enable_display),
xe->info.dma_mask_size, xe->info.tile_count,
xe->info.has_heci_gscfi);
xe->info.has_heci_gscfi, xe->info.has_heci_cscfi);
drm_dbg(&xe->drm, "Stepping = (G:%s, M:%s, D:%s, B:%s)\n",
xe_step_name(xe->info.step.graphics),
......
......@@ -20,6 +20,7 @@
#include "xe_guc.h"
#include "xe_irq.h"
#include "xe_pcode.h"
#include "xe_trace.h"
#include "xe_wa.h"
/**
......@@ -87,6 +88,7 @@ int xe_pm_suspend(struct xe_device *xe)
int err;
drm_dbg(&xe->drm, "Suspending device\n");
trace_xe_pm_suspend(xe, __builtin_return_address(0));
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
......@@ -131,6 +133,7 @@ int xe_pm_resume(struct xe_device *xe)
int err;
drm_dbg(&xe->drm, "Resuming device\n");
trace_xe_pm_resume(xe, __builtin_return_address(0));
for_each_tile(tile, xe, id)
xe_wa_apply_tile_workarounds(tile);
......@@ -326,6 +329,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
u8 id;
int err = 0;
trace_xe_pm_runtime_suspend(xe, __builtin_return_address(0));
/* Disable access_ongoing asserts and prevent recursive pm calls */
xe_pm_write_callback_task(xe, current);
......@@ -399,6 +403,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
u8 id;
int err = 0;
trace_xe_pm_runtime_resume(xe, __builtin_return_address(0));
/* Disable access_ongoing asserts and prevent recursive pm calls */
xe_pm_write_callback_task(xe, current);
......@@ -463,6 +468,7 @@ static void pm_runtime_lockdep_prime(void)
*/
void xe_pm_runtime_get(struct xe_device *xe)
{
trace_xe_pm_runtime_get(xe, __builtin_return_address(0));
pm_runtime_get_noresume(xe->drm.dev);
if (xe_pm_read_callback_task(xe) == current)
......@@ -478,6 +484,7 @@ void xe_pm_runtime_get(struct xe_device *xe)
*/
void xe_pm_runtime_put(struct xe_device *xe)
{
trace_xe_pm_runtime_put(xe, __builtin_return_address(0));
if (xe_pm_read_callback_task(xe) == current) {
pm_runtime_put_noidle(xe->drm.dev);
} else {
......@@ -495,6 +502,7 @@ void xe_pm_runtime_put(struct xe_device *xe)
*/
int xe_pm_runtime_get_ioctl(struct xe_device *xe)
{
trace_xe_pm_runtime_get_ioctl(xe, __builtin_return_address(0));
if (WARN_ON(xe_pm_read_callback_task(xe) == current))
return -ELOOP;
......
......@@ -17,10 +17,16 @@ static void preempt_fence_work_func(struct work_struct *w)
container_of(w, typeof(*pfence), preempt_work);
struct xe_exec_queue *q = pfence->q;
if (pfence->error)
if (pfence->error) {
dma_fence_set_error(&pfence->base, pfence->error);
else
q->ops->suspend_wait(q);
} else if (!q->ops->reset_status(q)) {
int err = q->ops->suspend_wait(q);
if (err)
dma_fence_set_error(&pfence->base, err);
} else {
dma_fence_set_error(&pfence->base, -ENOENT);
}
dma_fence_signal(&pfence->base);
/*
......
This diff is collapsed.
......@@ -17,6 +17,7 @@ struct xe_sync_entry;
struct xe_tile;
struct xe_vm;
struct xe_vma;
struct xe_vma_ops;
/* Largest huge pte is currently 1GiB. May become device dependent. */
#define MAX_HUGEPTE_LEVEL 2
......@@ -34,14 +35,11 @@ void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred);
struct dma_fence *
__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
struct xe_sync_entry *syncs, u32 num_syncs,
bool rebind);
struct dma_fence *
__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q,
struct xe_sync_entry *syncs, u32 num_syncs);
int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops);
struct dma_fence *xe_pt_update_ops_run(struct xe_tile *tile,
struct xe_vma_ops *vops);
void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops);
void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops);
bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
......
......@@ -74,4 +74,52 @@ struct xe_vm_pgtable_update {
u32 flags;
};
/** struct xe_vm_pgtable_update_op - Page table update operation */
struct xe_vm_pgtable_update_op {
/** @entries: entries to update for this operation */
struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
/** @vma: VMA for operation, operation not valid if NULL */
struct xe_vma *vma;
/** @num_entries: number of entries for this update operation */
u32 num_entries;
/** @bind: is a bind */
bool bind;
/** @rebind: is a rebind */
bool rebind;
};
/** struct xe_vm_pgtable_update_ops: page table update operations */
struct xe_vm_pgtable_update_ops {
/** @ops: operations */
struct xe_vm_pgtable_update_op *ops;
/** @deferred: deferred list to destroy PT entries */
struct llist_head deferred;
/** @q: exec queue for PT operations */
struct xe_exec_queue *q;
/** @start: start address of ops */
u64 start;
/** @last: last address of ops */
u64 last;
/** @num_ops: number of operations */
u32 num_ops;
/** @current_op: current operations */
u32 current_op;
/** @needs_userptr_lock: Needs userptr lock */
bool needs_userptr_lock;
/** @needs_invalidation: Needs invalidation */
bool needs_invalidation;
/**
* @wait_vm_bookkeep: PT operations need to wait until VM is idle
* (bookkeep dma-resv slots are idle) and stage all future VM activity
* behind these operations (install PT operations into VM kernel
* dma-resv slot).
*/
bool wait_vm_bookkeep;
/**
* @wait_vm_kernel: PT operations need to wait until VM kernel dma-resv
* slots are idle.
*/
bool wait_vm_kernel;
};
#endif
......@@ -518,7 +518,9 @@ static int query_gt_topology(struct xe_device *xe,
if (err)
return err;
topo.type = DRM_XE_TOPO_EU_PER_DSS;
topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ?
DRM_XE_TOPO_SIMD16_EU_PER_DSS :
DRM_XE_TOPO_EU_PER_DSS;
err = copy_mask(&query_ptr, &topo,
gt->fuse_topo.eu_mask_per_dss,
sizeof(gt->fuse_topo.eu_mask_per_dss));
......
......@@ -217,21 +217,19 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
ctx->active_entries = active_entries;
ctx->n_entries = n_entries;
}
EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_ctx_enable_active_tracking);
static void rtp_mark_active(struct xe_device *xe,
struct xe_rtp_process_ctx *ctx,
unsigned int first, unsigned int last)
unsigned int idx)
{
if (!ctx->active_entries)
return;
if (drm_WARN_ON(&xe->drm, last > ctx->n_entries))
if (drm_WARN_ON(&xe->drm, idx >= ctx->n_entries))
return;
if (first == last)
bitmap_set(ctx->active_entries, first, 1);
else
bitmap_set(ctx->active_entries, first, last - first + 2);
bitmap_set(ctx->active_entries, idx, 1);
}
/**
......@@ -276,8 +274,7 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
}
if (match)
rtp_mark_active(xe, ctx, entry - entries,
entry - entries);
rtp_mark_active(xe, ctx, entry - entries);
}
}
EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
......@@ -288,44 +285,29 @@ EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
* @entries: Table with RTP definitions
*
* Walk the table pointed by @entries (with an empty sentinel), executing the
* rules. A few differences from xe_rtp_process_to_sr():
*
* 1. There is no action associated with each entry since this uses
* struct xe_rtp_entry. Its main use is for marking active workarounds via
* xe_rtp_process_ctx_enable_active_tracking().
* 2. There is support for OR operations by having entries with no name.
* rules. One difference from xe_rtp_process_to_sr(): there is no action
* associated with each entry since this uses struct xe_rtp_entry. Its main use
* is for marking active workarounds via
* xe_rtp_process_ctx_enable_active_tracking().
*/
void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
const struct xe_rtp_entry *entries)
{
const struct xe_rtp_entry *entry, *first_entry;
const struct xe_rtp_entry *entry;
struct xe_hw_engine *hwe;
struct xe_gt *gt;
struct xe_device *xe;
rtp_get_context(ctx, &hwe, &gt, &xe);
first_entry = entries;
if (drm_WARN_ON(&xe->drm, !first_entry->name))
return;
for (entry = entries; entry && entry->rules; entry++) {
if (entry->name)
first_entry = entry;
if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
continue;
/* Fast-forward entry, eliminating the OR'ed entries */
for (entry++; entry && entry->rules; entry++)
if (entry->name)
break;
entry--;
rtp_mark_active(xe, ctx, first_entry - entries,
entry - entries);
rtp_mark_active(xe, ctx, entry - entries);
}
}
EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process);
bool xe_rtp_match_even_instance(const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
......
......@@ -374,7 +374,7 @@ struct xe_reg_sr;
* XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry
* @...: Rules
*
* At least one rule is needed and up to 6 are supported. Multiple rules are
* At least one rule is needed and up to 12 are supported. Multiple rules are
* AND'ed together, i.e. all the rules must evaluate to true for the entry to
* be processed. See XE_RTP_MATCH_* for the possible match rules. Example:
*
......@@ -399,7 +399,7 @@ struct xe_reg_sr;
* XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr
* @...: Actions to be taken
*
* At least one action is needed and up to 6 are supported. See XE_RTP_ACTION_*
* At least one action is needed and up to 12 are supported. See XE_RTP_ACTION_*
* for the possible actions. Example:
*
* .. code-block:: c
......
......@@ -60,6 +60,12 @@
#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_5(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_4(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_6(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_5(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_7(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_6(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_8(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_7(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_9(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_8(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_10(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_9(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_11(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_10(prefix_, sep_, _XE_TUPLE_TAIL args_)
#define XE_RTP_PASTE_12(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_11(prefix_, sep_, _XE_TUPLE_TAIL args_)
/*
* XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
......
......@@ -84,6 +84,13 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager,
unsigned int size)
{
/*
* BB to large, return -ENOBUFS indicating user should split
* array of binds into smaller chunks.
*/
if (size > sa_manager->base.size)
return ERR_PTR(-ENOBUFS);
return drm_suballoc_new(&sa_manager->base, size, GFP_KERNEL, true, 0);
}
......
......@@ -5,7 +5,7 @@
#include <drm/drm_managed.h>
#include "regs/xe_sriov_regs.h"
#include "regs/xe_regs.h"
#include "xe_assert.h"
#include "xe_device.h"
......
......@@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
u64 value)
{
struct xe_user_fence *ufence;
u64 __user *ptr = u64_to_user_ptr(addr);
if (!access_ok(ptr, sizeof(ptr)))
return ERR_PTR(-EFAULT);
ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
if (!ufence)
return NULL;
return ERR_PTR(-ENOMEM);
ufence->xe = xe;
kref_init(&ufence->refcount);
ufence->addr = u64_to_user_ptr(addr);
ufence->addr = ptr;
ufence->value = value;
ufence->mm = current->mm;
mmgrab(ufence->mm);
......@@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
} else {
sync->ufence = user_fence_create(xe, sync_in.addr,
sync_in.timeline_value);
if (XE_IOCTL_DBG(xe, !sync->ufence))
return -ENOMEM;
if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
return PTR_ERR(sync->ufence);
}
break;
......@@ -200,14 +204,6 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
return 0;
}
int xe_sync_entry_wait(struct xe_sync_entry *sync)
{
if (sync->fence)
dma_fence_wait(sync->fence, true);
return 0;
}
int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
{
int err;
......
......@@ -22,7 +22,6 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
struct xe_sync_entry *sync,
struct drm_xe_sync __user *sync_user,
unsigned int flags);
int xe_sync_entry_wait(struct xe_sync_entry *sync);
int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
struct xe_sched_job *job);
void xe_sync_entry_signal(struct xe_sync_entry *sync,
......
......@@ -369,6 +369,58 @@ TRACE_EVENT(xe_reg_rw,
(u32)(__entry->val >> 32))
);
DECLARE_EVENT_CLASS(xe_pm_runtime,
TP_PROTO(struct xe_device *xe, void *caller),
TP_ARGS(xe, caller),
TP_STRUCT__entry(
__string(dev, __dev_name_xe(xe))
__field(void *, caller)
),
TP_fast_assign(
__assign_str(dev);
__entry->caller = caller;
),
TP_printk("dev=%s caller_function=%pS", __get_str(dev), __entry->caller)
);
DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get,
TP_PROTO(struct xe_device *xe, void *caller),
TP_ARGS(xe, caller)
);
DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_put,
TP_PROTO(struct xe_device *xe, void *caller),
TP_ARGS(xe, caller)
);
DEFINE_EVENT(xe_pm_runtime, xe_pm_resume,
TP_PROTO(struct xe_device *xe, void *caller),
TP_ARGS(xe, caller)
);
DEFINE_EVENT(xe_pm_runtime, xe_pm_suspend,
TP_PROTO(struct xe_device *xe, void *caller),
TP_ARGS(xe, caller)
);
DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_resume,
TP_PROTO(struct xe_device *xe, void *caller),
TP_ARGS(xe, caller)
);
DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_suspend,
TP_PROTO(struct xe_device *xe, void *caller),
TP_ARGS(xe, caller)
);
DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get_ioctl,
TP_PROTO(struct xe_device *xe, void *caller),
TP_ARGS(xe, caller)
);
#endif
/* This part must be outside protection */
......
......@@ -117,11 +117,6 @@ DEFINE_EVENT(xe_vma, xe_vma_acc,
TP_ARGS(vma)
);
DEFINE_EVENT(xe_vma, xe_vma_fail,
TP_PROTO(struct xe_vma *vma),
TP_ARGS(vma)
);
DEFINE_EVENT(xe_vma, xe_vma_bind,
TP_PROTO(struct xe_vma *vma),
TP_ARGS(vma)
......@@ -237,6 +232,11 @@ DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit,
TP_ARGS(vm)
);
DEFINE_EVENT(xe_vm, xe_vm_ops_fail,
TP_PROTO(struct xe_vm *vm),
TP_ARGS(vm)
);
#endif
/* This part must be outside protection */
......
......@@ -93,6 +93,14 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
/* Xe2_HPG */
{ XE_RTP_NAME("Tuning: vs hit max value"),
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
},
{}
};
......
......@@ -116,6 +116,8 @@ struct fw_blobs_by_type {
fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2))
#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \
fw_def(BATTLEMAGE, no_ver(xe, huc, bmg)) \
fw_def(LUNARLAKE, no_ver(xe, huc, lnl)) \
fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \
fw_def(DG1, no_ver(i915, huc, dg1)) \
fw_def(ALDERLAKE_P, no_ver(i915, huc, tgl)) \
......@@ -125,6 +127,7 @@ struct fw_blobs_by_type {
/* for the GSC FW we match the compatibility version and not the release one */
#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \
fw_def(LUNARLAKE, major_ver(xe, gsc, lnl, 1, 0, 0)) \
fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 1, 0, 0))
#define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \
......
This diff is collapsed.
......@@ -259,6 +259,8 @@ static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm)
return drm_gpuvm_resv(&vm->gpuvm);
}
void xe_vm_kill(struct xe_vm *vm, bool unlocked);
/**
* xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
* @vm: The vm
......
......@@ -21,18 +21,27 @@ struct xe_bo;
struct xe_sync_entry;
struct xe_user_fence;
struct xe_vm;
struct xe_vm_pgtable_update_op;
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
#define TEST_VM_OPS_ERROR
#define FORCE_OP_ERROR BIT(31)
#define FORCE_OP_ERROR_LOCK 0
#define FORCE_OP_ERROR_PREPARE 1
#define FORCE_OP_ERROR_RUN 2
#define FORCE_OP_ERROR_COUNT 3
#endif
#define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS
#define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1)
#define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2)
#define XE_VMA_FIRST_REBIND (DRM_GPUVA_USERBITS << 3)
#define XE_VMA_LAST_REBIND (DRM_GPUVA_USERBITS << 4)
#define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5)
#define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6)
#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7)
#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8)
#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 9)
#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 10)
#define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 3)
#define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 4)
#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 5)
#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 6)
#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7)
#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8)
/** struct xe_userptr - User pointer */
struct xe_userptr {
......@@ -99,6 +108,9 @@ struct xe_vma {
*/
u8 tile_present;
/** @tile_staged: bind is staged for this VMA */
u8 tile_staged;
/**
* @pat_index: The pat index to use when encoding the PTEs for this vma.
*/
......@@ -314,31 +326,18 @@ struct xe_vma_op_prefetch {
/** enum xe_vma_op_flags - flags for VMA operation */
enum xe_vma_op_flags {
/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
XE_VMA_OP_FIRST = BIT(0),
/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
XE_VMA_OP_LAST = BIT(1),
/** @XE_VMA_OP_COMMITTED: VMA operation committed */
XE_VMA_OP_COMMITTED = BIT(2),
XE_VMA_OP_COMMITTED = BIT(0),
/** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation committed */
XE_VMA_OP_PREV_COMMITTED = BIT(3),
XE_VMA_OP_PREV_COMMITTED = BIT(1),
/** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed */
XE_VMA_OP_NEXT_COMMITTED = BIT(4),
XE_VMA_OP_NEXT_COMMITTED = BIT(2),
};
/** struct xe_vma_op - VMA operation */
struct xe_vma_op {
/** @base: GPUVA base operation */
struct drm_gpuva_op base;
/** @q: exec queue for this operation */
struct xe_exec_queue *q;
/**
* @syncs: syncs for this operation, only used on first and last
* operation
*/
struct xe_sync_entry *syncs;
/** @num_syncs: number of syncs */
u32 num_syncs;
/** @link: async operation link */
struct list_head link;
/** @flags: operation flags */
......@@ -362,12 +361,18 @@ struct xe_vma_ops {
struct list_head list;
/** @vm: VM */
struct xe_vm *vm;
/** @q: exec queue these operations */
/** @q: exec queue for VMA operations */
struct xe_exec_queue *q;
/** @syncs: syncs these operation */
struct xe_sync_entry *syncs;
/** @num_syncs: number of syncs */
u32 num_syncs;
/** @pt_update_ops: page table update operations */
struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE];
#ifdef TEST_VM_OPS_ERROR
/** @inject_error: inject error to test error handling */
bool inject_error;
#endif
};
#endif
......@@ -486,6 +486,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE))
},
{ XE_RTP_NAME("14021402888"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
/* Xe2_HPG */
......@@ -539,6 +543,16 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
/* Xe2_LPM */
{ XE_RTP_NAME("16021639441"),
XE_RTP_RULES(MEDIA_VERSION(2000)),
XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
GHWSP_CSB_REPORT_DIS |
PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
/* Xe2_HPM */
{ XE_RTP_NAME("16021639441"),
......@@ -741,6 +755,7 @@ void xe_wa_process_oob(struct xe_gt *gt)
xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.oob,
ARRAY_SIZE(oob_was));
gt->wa_active.oob_initialized = true;
xe_rtp_process(&ctx, oob_was);
}
......
......@@ -6,6 +6,8 @@
#ifndef _XE_WA_
#define _XE_WA_
#include "xe_assert.h"
struct drm_printer;
struct xe_gt;
struct xe_hw_engine;
......@@ -25,6 +27,9 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p);
* @gt__: gt instance
* @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h
*/
#define XE_WA(gt__, id__) test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob)
#define XE_WA(gt__, id__) ({ \
xe_gt_assert(gt__, (gt__)->wa_active.oob_initialized); \
test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \
})
#endif
......@@ -29,3 +29,5 @@
13011645652 GRAPHICS_VERSION(2004)
22019338487 MEDIA_VERSION(2000)
GRAPHICS_VERSION(2001)
22019338487_display PLATFORM(LUNARLAKE)
16023588340 GRAPHICS_VERSION(2001)
......@@ -517,7 +517,14 @@ struct drm_xe_query_gt_list {
* available per Dual Sub Slices (DSS). For example a query response
* containing the following in mask:
* ``EU_PER_DSS ff ff 00 00 00 00 00 00``
* means each DSS has 16 EU.
* means each DSS has 16 SIMD8 EUs. This type may be omitted if device
* doesn't have SIMD8 EUs.
* - %DRM_XE_TOPO_SIMD16_EU_PER_DSS - To query the mask of SIMD16 Execution
* Units (EU) available per Dual Sub Slices (DSS). For example a query
* response containing the following in mask:
* ``SIMD16_EU_PER_DSS ff ff 00 00 00 00 00 00``
* means each DSS has 16 SIMD16 EUs. This type may be omitted if device
* doesn't have SIMD16 EUs.
*/
struct drm_xe_query_topology_mask {
/** @gt_id: GT ID the mask is associated with */
......@@ -527,6 +534,7 @@ struct drm_xe_query_topology_mask {
#define DRM_XE_TOPO_DSS_COMPUTE 2
#define DRM_XE_TOPO_L3_BANK 3
#define DRM_XE_TOPO_EU_PER_DSS 4
#define DRM_XE_TOPO_SIMD16_EU_PER_DSS 5
/** @type: type of mask */
__u16 type;
......@@ -1590,10 +1598,10 @@ enum drm_xe_oa_property_id {
* b. Counter select c. Counter size and d. BC report. Also refer to the
* oa_formats array in drivers/gpu/drm/xe/xe_oa.c.
*/
#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0)
#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8)
#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16)
#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24)
#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0)
#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8)
#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16)
#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24)
/**
* @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment