Commit 3f53d7e4 authored by Daniel Vetter's avatar Daniel Vetter

Merge tag 'drm-intel-gt-next-2024-08-23' of...

Merge tag 'drm-intel-gt-next-2024-08-23' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-next

UAPI Changes:

- Limit the number of relocations to INT_MAX (Tvrtko)

  Only impact should be synthetic tests.

Driver Changes:

- Fix for #11396: GPU Hang and rcs0 reset on Cherrytrail platform
- Fix Virtual Memory mapping boundaries calculation (Andi)
- Fix for #11255: Long hangs in buddy allocator with DG2/A380 without
  Resizable BAR since 6.9 (David)
- Mark the GT as dead when mmio is unreliable (Chris, Andi)
- Workaround additions / fixes for MTL, ARL and DG2 (John H, Nitin)
- Enable partial memory mapping of GPU virtual memory (Andi, Chris)

- Prevent NULL deref on intel_memory_regions_hw_probe (Jonathan, Dan)
- Avoid UAF on intel_engines_release (Krzysztof)

- Don't update PWR_CLK_STATE starting Gen12 (Umesh)
- Code and dmesg cleanups (Andi, Jesus, Luca)
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZshcfSqgfnl8Mh4P@jlahtine-mobl.ger.corp.intel.com
parents f9ae00b1 255fc170
......@@ -1533,7 +1533,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
u64_to_user_ptr(entry->relocs_ptr);
unsigned long remain = entry->relocation_count;
if (unlikely(remain > N_RELOC(ULONG_MAX)))
if (unlikely(remain > N_RELOC(INT_MAX)))
return -EINVAL;
/*
......@@ -1641,7 +1641,7 @@ static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
if (size == 0)
return 0;
if (size > N_RELOC(ULONG_MAX))
if (size > N_RELOC(INT_MAX))
return -EINVAL;
addr = u64_to_user_ptr(entry->relocs_ptr);
......
......@@ -252,6 +252,7 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
struct vm_area_struct *area = vmf->vma;
struct i915_mmap_offset *mmo = area->vm_private_data;
struct drm_i915_gem_object *obj = mmo->obj;
unsigned long obj_offset;
resource_size_t iomap;
int err;
......@@ -273,10 +274,11 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
iomap -= obj->mm.region->region.start;
}
obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node);
/* PTEs are revoked in obj->ops->put_pages() */
err = remap_io_sg(area,
area->vm_start, area->vm_end - area->vm_start,
obj->mm.pages->sgl, iomap);
obj->mm.pages->sgl, obj_offset, iomap);
if (area->vm_flags & VM_WRITE) {
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
......@@ -290,6 +292,47 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
return i915_error_to_vmf_fault(err);
}
static void set_address_limits(struct vm_area_struct *area,
struct i915_vma *vma,
unsigned long obj_offset,
resource_size_t gmadr_start,
unsigned long *start_vaddr,
unsigned long *end_vaddr,
unsigned long *pfn)
{
unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */
long start, end; /* memory boundaries */
/*
* Let's move into the ">> PAGE_SHIFT"
* domain to be sure not to lose bits
*/
vm_start = area->vm_start >> PAGE_SHIFT;
vm_end = area->vm_end >> PAGE_SHIFT;
vma_size = vma->size >> PAGE_SHIFT;
/*
* Calculate the memory boundaries by considering the offset
* provided by the user during memory mapping and the offset
* provided for the partial mapping.
*/
start = vm_start;
start -= obj_offset;
start += vma->gtt_view.partial.offset;
end = start + vma_size;
start = max_t(long, start, vm_start);
end = min_t(long, end, vm_end);
/* Let's move back into the "<< PAGE_SHIFT" domain */
*start_vaddr = (unsigned long)start << PAGE_SHIFT;
*end_vaddr = (unsigned long)end << PAGE_SHIFT;
*pfn = (gmadr_start + i915_ggtt_offset(vma)) >> PAGE_SHIFT;
*pfn += (*start_vaddr - area->vm_start) >> PAGE_SHIFT;
*pfn += obj_offset - vma->gtt_view.partial.offset;
}
static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
{
#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
......@@ -302,14 +345,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
bool write = area->vm_flags & VM_WRITE;
struct i915_gem_ww_ctx ww;
unsigned long obj_offset;
unsigned long start, end; /* memory boundaries */
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
unsigned long pfn;
int srcu;
int ret;
/* We don't use vmf->pgoff since that has the fake offset */
obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node);
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
page_offset += obj_offset;
trace_i915_gem_object_fault(obj, page_offset, true, write);
......@@ -402,12 +449,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
if (ret)
goto err_unpin;
/*
* Dump all the necessary parameters in this function to perform the
* arithmetic calculation for the virtual address start and end and
* the PFN (Page Frame Number).
*/
set_address_limits(area, vma, obj_offset, ggtt->gmadr.start,
&start, &end, &pfn);
/* Finally, remap it using the new GTT offset */
ret = remap_io_mapping(area,
area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT),
(ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT,
min_t(u64, vma->size, area->vm_end - area->vm_start),
&ggtt->iomap);
ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap);
if (ret)
goto err_fence;
......@@ -1030,9 +1081,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
rcu_read_lock();
drm_vma_offset_lock_lookup(dev->vma_offset_manager);
node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
vma->vm_pgoff,
vma_pages(vma));
node = drm_vma_offset_lookup_locked(dev->vma_offset_manager,
vma->vm_pgoff,
vma_pages(vma));
if (node && drm_vma_node_is_allowed(node, priv)) {
/*
* Skip 0-refcnted objects as it is in the process of being
......@@ -1084,6 +1135,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma
mmo = mmap_offset_attach(obj, mmap_type, NULL);
if (IS_ERR(mmo))
return PTR_ERR(mmo);
vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node);
}
/*
......
......@@ -535,7 +535,7 @@ struct drm_i915_gem_object {
* I915_CACHE_NONE. The only exception is userptr objects, where we
* instead force I915_CACHE_LLC, but we also don't allow userspace to
* ever change the @cache_level for such objects. Another special case
* is dma-buf, which doesn't rely on @cache_dirty, but there we
* is dma-buf, which doesn't rely on @cache_dirty, but there we
* always do a forced flush when acquiring the pages, if there is a
* chance that the pages can be read directly from main memory with
* the GPU.
......
......@@ -165,7 +165,6 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
obj->mm.region, &places[0], obj->bo_offset,
obj->base.size, flags);
places[0].flags |= TTM_PL_FLAG_DESIRED;
/* Cache this on object? */
for (i = 0; i < num_allowed; ++i) {
......@@ -779,13 +778,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
.interruptible = true,
.no_wait_gpu = false,
};
int real_num_busy;
struct ttm_placement initial_placement;
struct ttm_place initial_place;
int ret;
/* First try only the requested placement. No eviction. */
real_num_busy = placement->num_placement;
placement->num_placement = 1;
ret = ttm_bo_validate(bo, placement, &ctx);
initial_placement.num_placement = 1;
memcpy(&initial_place, placement->placement, sizeof(struct ttm_place));
initial_place.flags |= TTM_PL_FLAG_DESIRED;
initial_placement.placement = &initial_place;
ret = ttm_bo_validate(bo, &initial_placement, &ctx);
if (ret) {
ret = i915_ttm_err_to_gem(ret);
/*
......@@ -800,7 +802,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
* If the initial attempt fails, allow all accepted placements,
* evicting if necessary.
*/
placement->num_placement = real_num_busy;
ret = ttm_bo_validate(bo, placement, &ctx);
if (ret)
return i915_ttm_err_to_gem(ret);
......
......@@ -693,6 +693,8 @@ void intel_engines_release(struct intel_gt *gt)
memset(&engine->reset, 0, sizeof(engine->reset));
}
llist_del_all(&gt->i915->uabi_engines_llist);
}
void intel_engine_free_request_pool(struct intel_engine_cs *engine)
......
......@@ -220,6 +220,7 @@
#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2)
#define CMD_3DSTATE_MESH_CONTROL ((0x3 << 29) | (0x3 << 27) | (0x0 << 24) | (0x77 << 16) | (0x3))
#define XY_CTRL_SURF_INSTR_SIZE 5
#define MI_FLUSH_DW_SIZE 3
......
......@@ -174,7 +174,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
int intel_gt_probe_all(struct drm_i915_private *i915);
int intel_gt_tiles_init(struct drm_i915_private *i915);
void intel_gt_release_all(struct drm_i915_private *i915);
#define for_each_gt(gt__, i915__, id__) \
for ((id__) = 0; \
......@@ -208,4 +207,10 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
void intel_gt_bind_context_set_ready(struct intel_gt *gt);
void intel_gt_bind_context_set_unready(struct intel_gt *gt);
bool intel_gt_is_bind_context_ready(struct intel_gt *gt);
static inline void intel_gt_set_wedged_async(struct intel_gt *gt)
{
queue_work(system_highpri_wq, &gt->wedge);
}
#endif /* __INTEL_GT_H__ */
......@@ -292,6 +292,8 @@ struct intel_gt {
struct gt_defaults defaults;
struct kobject *sysfs_defaults;
struct work_struct wedge;
struct i915_perf_gt perf;
/** link: &ggtt.gt_list */
......
......@@ -1013,6 +1013,15 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
GT_TRACE(gt, "end\n");
}
static void set_wedged_work(struct work_struct *w)
{
struct intel_gt *gt = container_of(w, struct intel_gt, wedge);
intel_wakeref_t wf;
with_intel_runtime_pm(gt->uncore->rpm, wf)
__intel_gt_set_wedged(gt);
}
void intel_gt_set_wedged(struct intel_gt *gt)
{
intel_wakeref_t wakeref;
......@@ -1614,6 +1623,7 @@ void intel_gt_init_reset(struct intel_gt *gt)
init_waitqueue_head(&gt->reset.queue);
mutex_init(&gt->reset.mutex);
init_srcu_struct(&gt->reset.backoff_srcu);
INIT_WORK(&gt->wedge, set_wedged_work);
/*
* While undesirable to wait inside the shrinker, complain anyway.
......@@ -1640,7 +1650,7 @@ static void intel_wedge_me(struct work_struct *work)
struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name);
intel_gt_set_wedged(w->gt);
set_wedged_work(&w->gt->wedge);
}
void __intel_init_wedge(struct intel_wedge_me *w,
......
......@@ -974,7 +974,12 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
if (ret)
return ret;
cs = intel_ring_begin(rq, (wal->count * 2 + 2));
if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS)
cs = intel_ring_begin(rq, (wal->count * 2 + 6));
else
cs = intel_ring_begin(rq, (wal->count * 2 + 2));
if (IS_ERR(cs))
return PTR_ERR(cs);
......@@ -1004,6 +1009,15 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
}
*cs++ = MI_NOOP;
/* Wa_14019789679 */
if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) {
*cs++ = CMD_3DSTATE_MESH_CONTROL;
*cs++ = 0;
*cs++ = 0;
*cs++ = MI_NOOP;
}
intel_uncore_forcewake_put__locked(uncore, fw);
spin_unlock(&uncore->lock);
intel_gt_mcr_unlock(wal->gt, flags);
......
......@@ -336,7 +336,7 @@ static int clear(struct intel_migrate *migrate,
if (vaddr[x] != val) {
pr_err("%ps failed, (%u != %u), offset: %zu\n",
fn, vaddr[x], val, x * sizeof(u32));
fn, vaddr[x], val, x * sizeof(u32));
igt_hexdump(vaddr + i * 1024, 4096);
err = -EINVAL;
}
......
......@@ -107,6 +107,7 @@ enum {
enum {
GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001,
GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002,
GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE = 0x9006,
};
#endif /* _ABI_GUC_KLVS_ABI_H */
......@@ -296,7 +296,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
/* Wa_16019325821 */
/* Wa_14019159160 */
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
flags |= GUC_WA_RCS_CCS_SWITCHOUT;
/*
......
......@@ -815,8 +815,7 @@ guc_capture_prep_lists(struct intel_guc *guc)
return PAGE_ALIGN(total_size);
}
static void guc_waklv_enable_simple(struct intel_guc *guc,
u32 klv_id, u32 *offset, u32 *remain)
static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id)
{
u32 size;
u32 klv_entry[] = {
......@@ -850,19 +849,20 @@ static void guc_waklv_init(struct intel_guc *guc)
remain = guc_ads_waklv_size(guc);
/* Wa_14019159160 */
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
guc_waklv_enable_simple(guc,
GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE,
&offset, &remain);
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
guc_waklv_enable_simple(guc, &offset, &remain,
GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE);
guc_waklv_enable_simple(guc, &offset, &remain,
GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE);
}
/* Wa_16021333562 */
if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) &&
(IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) ||
IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) ||
IS_DG2(gt->i915)))
guc_waklv_enable_simple(guc,
GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
&offset, &remain);
guc_waklv_enable_simple(guc, &offset, &remain,
GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
size = guc_ads_waklv_size(guc) - remain;
if (!size)
......
......@@ -2014,11 +2014,12 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
/*
* Technically possible for either of these values to be non-zero here,
* but very unlikely + harmless. Regardless let's add a warn so we can
* but very unlikely + harmless. Regardless let's add an error so we can
* see in CI if this happens frequently / a precursor to taking down the
* machine.
*/
GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h));
if (atomic_read(&guc->outstanding_submission_g2h))
guc_err(guc, "Unexpected outstanding GuC to Host in reset finish\n");
atomic_set(&guc->outstanding_submission_g2h, 0);
intel_guc_global_policies_update(guc);
......
......@@ -99,7 +99,7 @@ static void __confirm_options(struct intel_uc *uc)
}
if (!intel_uc_supports_guc(uc))
gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
i915->params.enable_guc, "GuC is not supported!");
if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
......
......@@ -122,13 +122,15 @@ int remap_io_mapping(struct vm_area_struct *vma,
* @addr: target user address to start at
* @size: size of map area
* @sgl: Start sg entry
* @offset: offset from the start of the page
* @iobase: Use stored dma address offset by this address or pfn if -1
*
* Note: this is only safe if the mm semaphore is held when called.
*/
int remap_io_sg(struct vm_area_struct *vma,
unsigned long addr, unsigned long size,
struct scatterlist *sgl, resource_size_t iobase)
struct scatterlist *sgl, unsigned long offset,
resource_size_t iobase)
{
struct remap_pfn r = {
.mm = vma->vm_mm,
......@@ -141,6 +143,14 @@ int remap_io_sg(struct vm_area_struct *vma,
/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) {
offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT;
r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase));
if (!r.sgt.sgp)
return -EINVAL;
}
r.sgt.curr = offset << PAGE_SHIFT;
if (!use_dma(iobase))
flush_cache_range(vma, addr, size);
......
......@@ -30,6 +30,7 @@ int remap_io_mapping(struct vm_area_struct *vma,
int remap_io_sg(struct vm_area_struct *vma,
unsigned long addr, unsigned long size,
struct scatterlist *sgl, resource_size_t iobase);
struct scatterlist *sgl, unsigned long offset,
resource_size_t iobase);
#endif /* __I915_MM_H__ */
......@@ -24,6 +24,7 @@
#include <drm/drm_managed.h>
#include <linux/pm_runtime.h>
#include "gt/intel_gt.h"
#include "gt/intel_engine_regs.h"
#include "gt/intel_gt_regs.h"
......@@ -180,14 +181,16 @@ fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d)
if (!wait_ack_clear(d, FORCEWAKE_KERNEL))
return;
if (fw_ack(d) == ~0)
if (fw_ack(d) == ~0) {
drm_err(&d->uncore->i915->drm,
"%s: MMIO unreliable (forcewake register returns 0xFFFFFFFF)!\n",
intel_uncore_forcewake_domain_to_str(d->id));
else
intel_gt_set_wedged_async(d->uncore->gt);
} else {
drm_err(&d->uncore->i915->drm,
"%s: timed out waiting for forcewake ack to clear.\n",
intel_uncore_forcewake_domain_to_str(d->id));
}
add_taint_for_CI(d->uncore->i915, TAINT_WARN); /* CI now unreliable */
}
......
......@@ -517,7 +517,7 @@ static int igt_mock_max_segment(void *arg)
if (!IS_ALIGNED(daddr, ps)) {
pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n",
__func__, &daddr, ps);
__func__, &daddr, ps);
err = -EINVAL;
goto out_close;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment