Commit aa5fe428 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-xe-fixes-2024-02-29' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

UAPI Changes:
- A couple of tracepoint updates from Priyanka and Lucas.
- Make sure BINDs are completed before accepting UNBINDs on LR vms.
- Don't arbitrarily restrict max number of batched binds.
- Add uapi for dumpable bos (agreed on IRC).
- Remove unused uapi flags and a leftover comment.

Driver Changes:
- A couple of fixes related to the execlist backend.
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZeCBg4MA2hd1oggN@fedora
parents 45046af3 8188cae3
...@@ -28,6 +28,14 @@ ...@@ -28,6 +28,14 @@
#include "xe_ttm_stolen_mgr.h" #include "xe_ttm_stolen_mgr.h"
#include "xe_vm.h" #include "xe_vm.h"
const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = {
[XE_PL_SYSTEM] = "system",
[XE_PL_TT] = "gtt",
[XE_PL_VRAM0] = "vram0",
[XE_PL_VRAM1] = "vram1",
[XE_PL_STOLEN] = "stolen"
};
static const struct ttm_place sys_placement_flags = { static const struct ttm_place sys_placement_flags = {
.fpfn = 0, .fpfn = 0,
.lpfn = 0, .lpfn = 0,
...@@ -713,8 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, ...@@ -713,8 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
migrate = xe->tiles[0].migrate; migrate = xe->tiles[0].migrate;
xe_assert(xe, migrate); xe_assert(xe, migrate);
trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
trace_xe_bo_move(bo);
xe_device_mem_access_get(xe); xe_device_mem_access_get(xe);
if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
......
...@@ -243,6 +243,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo); ...@@ -243,6 +243,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo);
int xe_bo_restore_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo);
extern struct ttm_device_funcs xe_ttm_funcs; extern struct ttm_device_funcs xe_ttm_funcs;
extern const char *const xe_mem_type_to_name[];
int xe_gem_create_ioctl(struct drm_device *dev, void *data, int xe_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file); struct drm_file *file);
......
...@@ -131,14 +131,6 @@ static void bo_meminfo(struct xe_bo *bo, ...@@ -131,14 +131,6 @@ static void bo_meminfo(struct xe_bo *bo,
static void show_meminfo(struct drm_printer *p, struct drm_file *file) static void show_meminfo(struct drm_printer *p, struct drm_file *file)
{ {
static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES] = {
[XE_PL_SYSTEM] = "system",
[XE_PL_TT] = "gtt",
[XE_PL_VRAM0] = "vram0",
[XE_PL_VRAM1] = "vram1",
[4 ... 6] = NULL,
[XE_PL_STOLEN] = "stolen"
};
struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {};
struct xe_file *xef = file->driver_priv; struct xe_file *xef = file->driver_priv;
struct ttm_device *bdev = &xef->xe->ttm; struct ttm_device *bdev = &xef->xe->ttm;
...@@ -171,7 +163,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) ...@@ -171,7 +163,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
spin_unlock(&client->bos_lock); spin_unlock(&client->bos_lock);
for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) {
if (!mem_type_to_name[mem_type]) if (!xe_mem_type_to_name[mem_type])
continue; continue;
man = ttm_manager_type(bdev, mem_type); man = ttm_manager_type(bdev, mem_type);
...@@ -182,7 +174,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) ...@@ -182,7 +174,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
DRM_GEM_OBJECT_RESIDENT | DRM_GEM_OBJECT_RESIDENT |
(mem_type != XE_PL_SYSTEM ? 0 : (mem_type != XE_PL_SYSTEM ? 0 :
DRM_GEM_OBJECT_PURGEABLE), DRM_GEM_OBJECT_PURGEABLE),
mem_type_to_name[mem_type]); xe_mem_type_to_name[mem_type]);
} }
} }
} }
......
...@@ -309,85 +309,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * ...@@ -309,85 +309,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
return q->ops->set_timeslice(q, value); return q->ops->set_timeslice(q, value);
} }
static int exec_queue_set_preemption_timeout(struct xe_device *xe,
struct xe_exec_queue *q, u64 value,
bool create)
{
u32 min = 0, max = 0;
xe_exec_queue_get_prop_minmax(q->hwe->eclass,
XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max);
if (xe_exec_queue_enforce_schedule_limit() &&
!xe_hw_engine_timeout_in_range(value, min, max))
return -EINVAL;
return q->ops->set_preempt_timeout(q, value);
}
static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q,
u64 value, bool create)
{
u32 min = 0, max = 0;
if (XE_IOCTL_DBG(xe, !create))
return -EINVAL;
xe_exec_queue_get_prop_minmax(q->hwe->eclass,
XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max);
if (xe_exec_queue_enforce_schedule_limit() &&
!xe_hw_engine_timeout_in_range(value, min, max))
return -EINVAL;
return q->ops->set_job_timeout(q, value);
}
static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
u64 value, bool create)
{
if (XE_IOCTL_DBG(xe, !create))
return -EINVAL;
if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
return -EINVAL;
q->usm.acc_trigger = value;
return 0;
}
static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q,
u64 value, bool create)
{
if (XE_IOCTL_DBG(xe, !create))
return -EINVAL;
if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
return -EINVAL;
q->usm.acc_notify = value;
return 0;
}
static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q,
u64 value, bool create)
{
if (XE_IOCTL_DBG(xe, !create))
return -EINVAL;
if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
return -EINVAL;
if (value > DRM_XE_ACC_GRANULARITY_64M)
return -EINVAL;
q->usm.acc_granularity = value;
return 0;
}
typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
struct xe_exec_queue *q, struct xe_exec_queue *q,
u64 value, bool create); u64 value, bool create);
...@@ -395,11 +316,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, ...@@ -395,11 +316,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity,
}; };
static int exec_queue_user_ext_set_property(struct xe_device *xe, static int exec_queue_user_ext_set_property(struct xe_device *xe,
...@@ -418,7 +334,9 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, ...@@ -418,7 +334,9 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
if (XE_IOCTL_DBG(xe, ext.property >= if (XE_IOCTL_DBG(xe, ext.property >=
ARRAY_SIZE(exec_queue_set_property_funcs)) || ARRAY_SIZE(exec_queue_set_property_funcs)) ||
XE_IOCTL_DBG(xe, ext.pad)) XE_IOCTL_DBG(xe, ext.pad) ||
XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE))
return -EINVAL; return -EINVAL;
idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
......
...@@ -150,16 +150,6 @@ struct xe_exec_queue { ...@@ -150,16 +150,6 @@ struct xe_exec_queue {
spinlock_t lock; spinlock_t lock;
} compute; } compute;
/** @usm: unified shared memory state */
struct {
/** @acc_trigger: access counter trigger */
u32 acc_trigger;
/** @acc_notify: access counter notify */
u32 acc_notify;
/** @acc_granularity: access counter granularity */
u32 acc_granularity;
} usm;
/** @ops: submission backend exec queue operations */ /** @ops: submission backend exec queue operations */
const struct xe_exec_queue_ops *ops; const struct xe_exec_queue_ops *ops;
......
...@@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, ...@@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
{ {
struct xe_execlist_port *port = exl->port; struct xe_execlist_port *port = exl->port;
enum xe_exec_queue_priority priority = exl->active_priority; enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
XE_WARN_ON(priority < 0); XE_WARN_ON(priority < 0);
......
...@@ -247,6 +247,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, ...@@ -247,6 +247,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
xe_gt_assert(gt, vma); xe_gt_assert(gt, vma);
/* Execlists not supported */
if (gt_to_xe(gt)->info.force_execlist) {
if (fence)
__invalidation_fence_signal(fence);
return 0;
}
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
if (!xe->info.has_range_tlb_invalidation) { if (!xe->info.has_range_tlb_invalidation) {
...@@ -317,6 +325,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) ...@@ -317,6 +325,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
struct drm_printer p = drm_err_printer(__func__); struct drm_printer p = drm_err_printer(__func__);
int ret; int ret;
/* Execlists not supported */
if (gt_to_xe(gt)->info.force_execlist)
return 0;
/* /*
* XXX: See above, this algorithm only works if seqno are always in * XXX: See above, this algorithm only works if seqno are always in
* order * order
......
...@@ -682,8 +682,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) ...@@ -682,8 +682,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
#define PVC_CTX_ASID (0x2e + 1) #define PVC_CTX_ASID (0x2e + 1)
#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
#define ACC_GRANULARITY_S 20
#define ACC_NOTIFY_S 16
int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
...@@ -754,13 +752,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, ...@@ -754,13 +752,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
RING_CTL_SIZE(lrc->ring.size) | RING_VALID); RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
if (xe->info.has_asid && vm) if (xe->info.has_asid && vm)
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
(q->usm.acc_granularity <<
ACC_GRANULARITY_S) | vm->usm.asid);
if (xe->info.has_usm && vm)
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
(q->usm.acc_notify << ACC_NOTIFY_S) |
q->usm.acc_trigger);
lrc->desc = LRC_VALID; lrc->desc = LRC_VALID;
lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
......
...@@ -105,7 +105,7 @@ static void xe_resize_vram_bar(struct xe_device *xe) ...@@ -105,7 +105,7 @@ static void xe_resize_vram_bar(struct xe_device *xe)
pci_bus_for_each_resource(root, root_res, i) { pci_bus_for_each_resource(root, root_res, i) {
if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
root_res->start > 0x100000000ull) (u64)root_res->start > 0x100000000ul)
break; break;
} }
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "xe_macros.h" #include "xe_macros.h"
#include "xe_sched_job_types.h" #include "xe_sched_job_types.h"
struct user_fence { struct xe_user_fence {
struct xe_device *xe; struct xe_device *xe;
struct kref refcount; struct kref refcount;
struct dma_fence_cb cb; struct dma_fence_cb cb;
...@@ -27,31 +27,32 @@ struct user_fence { ...@@ -27,31 +27,32 @@ struct user_fence {
struct mm_struct *mm; struct mm_struct *mm;
u64 __user *addr; u64 __user *addr;
u64 value; u64 value;
int signalled;
}; };
static void user_fence_destroy(struct kref *kref) static void user_fence_destroy(struct kref *kref)
{ {
struct user_fence *ufence = container_of(kref, struct user_fence, struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence,
refcount); refcount);
mmdrop(ufence->mm); mmdrop(ufence->mm);
kfree(ufence); kfree(ufence);
} }
static void user_fence_get(struct user_fence *ufence) static void user_fence_get(struct xe_user_fence *ufence)
{ {
kref_get(&ufence->refcount); kref_get(&ufence->refcount);
} }
static void user_fence_put(struct user_fence *ufence) static void user_fence_put(struct xe_user_fence *ufence)
{ {
kref_put(&ufence->refcount, user_fence_destroy); kref_put(&ufence->refcount, user_fence_destroy);
} }
static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr,
u64 value) u64 value)
{ {
struct user_fence *ufence; struct xe_user_fence *ufence;
ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL);
if (!ufence) if (!ufence)
...@@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, ...@@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr,
static void user_fence_worker(struct work_struct *w) static void user_fence_worker(struct work_struct *w)
{ {
struct user_fence *ufence = container_of(w, struct user_fence, worker); struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker);
if (mmget_not_zero(ufence->mm)) { if (mmget_not_zero(ufence->mm)) {
kthread_use_mm(ufence->mm); kthread_use_mm(ufence->mm);
...@@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w) ...@@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w)
} }
wake_up_all(&ufence->xe->ufence_wq); wake_up_all(&ufence->xe->ufence_wq);
WRITE_ONCE(ufence->signalled, 1);
user_fence_put(ufence); user_fence_put(ufence);
} }
static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence)
{ {
INIT_WORK(&ufence->worker, user_fence_worker); INIT_WORK(&ufence->worker, user_fence_worker);
queue_work(ufence->xe->ordered_wq, &ufence->worker); queue_work(ufence->xe->ordered_wq, &ufence->worker);
...@@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) ...@@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence)
static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
{ {
struct user_fence *ufence = container_of(cb, struct user_fence, cb); struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb);
kick_ufence(ufence, fence); kick_ufence(ufence, fence);
} }
...@@ -340,3 +342,39 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, ...@@ -340,3 +342,39 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
/**
* xe_sync_ufence_get() - Get user fence from sync
* @sync: input sync
*
* Get a user fence reference from sync.
*
* Return: xe_user_fence pointer with reference
*/
struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync)
{
user_fence_get(sync->ufence);
return sync->ufence;
}
/**
* xe_sync_ufence_put() - Put user fence reference
* @ufence: user fence reference
*
*/
void xe_sync_ufence_put(struct xe_user_fence *ufence)
{
user_fence_put(ufence);
}
/**
* xe_sync_ufence_get_status() - Get user fence status
* @ufence: user fence
*
* Return: 1 if signalled, 0 not signalled, <0 on error
*/
int xe_sync_ufence_get_status(struct xe_user_fence *ufence)
{
return READ_ONCE(ufence->signalled);
}
...@@ -38,4 +38,8 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) ...@@ -38,4 +38,8 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync)
return !!sync->ufence; return !!sync->ufence;
} }
struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync);
void xe_sync_ufence_put(struct xe_user_fence *ufence);
int xe_sync_ufence_get_status(struct xe_user_fence *ufence);
#endif #endif
...@@ -18,7 +18,7 @@ struct xe_sync_entry { ...@@ -18,7 +18,7 @@ struct xe_sync_entry {
struct drm_syncobj *syncobj; struct drm_syncobj *syncobj;
struct dma_fence *fence; struct dma_fence *fence;
struct dma_fence_chain *chain_fence; struct dma_fence_chain *chain_fence;
struct user_fence *ufence; struct xe_user_fence *ufence;
u64 addr; u64 addr;
u64 timeline_value; u64 timeline_value;
u32 type; u32 type;
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#include <linux/types.h> #include <linux/types.h>
#include "xe_bo.h"
#include "xe_bo_types.h" #include "xe_bo_types.h"
#include "xe_exec_queue_types.h" #include "xe_exec_queue_types.h"
#include "xe_gpu_scheduler_types.h" #include "xe_gpu_scheduler_types.h"
...@@ -26,16 +27,16 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, ...@@ -26,16 +27,16 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
TP_ARGS(fence), TP_ARGS(fence),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u64, fence) __field(struct xe_gt_tlb_invalidation_fence *, fence)
__field(int, seqno) __field(int, seqno)
), ),
TP_fast_assign( TP_fast_assign(
__entry->fence = (u64)fence; __entry->fence = fence;
__entry->seqno = fence->seqno; __entry->seqno = fence->seqno;
), ),
TP_printk("fence=0x%016llx, seqno=%d", TP_printk("fence=%p, seqno=%d",
__entry->fence, __entry->seqno) __entry->fence, __entry->seqno)
); );
...@@ -82,16 +83,16 @@ DECLARE_EVENT_CLASS(xe_bo, ...@@ -82,16 +83,16 @@ DECLARE_EVENT_CLASS(xe_bo,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(size_t, size) __field(size_t, size)
__field(u32, flags) __field(u32, flags)
__field(u64, vm) __field(struct xe_vm *, vm)
), ),
TP_fast_assign( TP_fast_assign(
__entry->size = bo->size; __entry->size = bo->size;
__entry->flags = bo->flags; __entry->flags = bo->flags;
__entry->vm = (unsigned long)bo->vm; __entry->vm = bo->vm;
), ),
TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx", TP_printk("size=%zu, flags=0x%02x, vm=%p",
__entry->size, __entry->flags, __entry->vm) __entry->size, __entry->flags, __entry->vm)
); );
...@@ -100,9 +101,31 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, ...@@ -100,9 +101,31 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault,
TP_ARGS(bo) TP_ARGS(bo)
); );
DEFINE_EVENT(xe_bo, xe_bo_move, TRACE_EVENT(xe_bo_move,
TP_PROTO(struct xe_bo *bo), TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement,
TP_ARGS(bo) bool move_lacks_source),
TP_ARGS(bo, new_placement, old_placement, move_lacks_source),
TP_STRUCT__entry(
__field(struct xe_bo *, bo)
__field(size_t, size)
__field(u32, new_placement)
__field(u32, old_placement)
__array(char, device_id, 12)
__field(bool, move_lacks_source)
),
TP_fast_assign(
__entry->bo = bo;
__entry->size = bo->size;
__entry->new_placement = new_placement;
__entry->old_placement = old_placement;
strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12);
__entry->move_lacks_source = move_lacks_source;
),
TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s",
__entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size,
xe_mem_type_to_name[__entry->old_placement],
xe_mem_type_to_name[__entry->new_placement], __entry->device_id)
); );
DECLARE_EVENT_CLASS(xe_exec_queue, DECLARE_EVENT_CLASS(xe_exec_queue,
...@@ -327,16 +350,16 @@ DECLARE_EVENT_CLASS(xe_hw_fence, ...@@ -327,16 +350,16 @@ DECLARE_EVENT_CLASS(xe_hw_fence,
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u64, ctx) __field(u64, ctx)
__field(u32, seqno) __field(u32, seqno)
__field(u64, fence) __field(struct xe_hw_fence *, fence)
), ),
TP_fast_assign( TP_fast_assign(
__entry->ctx = fence->dma.context; __entry->ctx = fence->dma.context;
__entry->seqno = fence->dma.seqno; __entry->seqno = fence->dma.seqno;
__entry->fence = (unsigned long)fence; __entry->fence = fence;
), ),
TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", TP_printk("ctx=0x%016llx, fence=%p, seqno=%u",
__entry->ctx, __entry->fence, __entry->seqno) __entry->ctx, __entry->fence, __entry->seqno)
); );
...@@ -365,7 +388,7 @@ DECLARE_EVENT_CLASS(xe_vma, ...@@ -365,7 +388,7 @@ DECLARE_EVENT_CLASS(xe_vma,
TP_ARGS(vma), TP_ARGS(vma),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u64, vma) __field(struct xe_vma *, vma)
__field(u32, asid) __field(u32, asid)
__field(u64, start) __field(u64, start)
__field(u64, end) __field(u64, end)
...@@ -373,14 +396,14 @@ DECLARE_EVENT_CLASS(xe_vma, ...@@ -373,14 +396,14 @@ DECLARE_EVENT_CLASS(xe_vma,
), ),
TP_fast_assign( TP_fast_assign(
__entry->vma = (unsigned long)vma; __entry->vma = vma;
__entry->asid = xe_vma_vm(vma)->usm.asid; __entry->asid = xe_vma_vm(vma)->usm.asid;
__entry->start = xe_vma_start(vma); __entry->start = xe_vma_start(vma);
__entry->end = xe_vma_end(vma) - 1; __entry->end = xe_vma_end(vma) - 1;
__entry->ptr = xe_vma_userptr(vma); __entry->ptr = xe_vma_userptr(vma);
), ),
TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,",
__entry->vma, __entry->asid, __entry->start, __entry->vma, __entry->asid, __entry->start,
__entry->end, __entry->ptr) __entry->end, __entry->ptr)
) )
...@@ -465,16 +488,16 @@ DECLARE_EVENT_CLASS(xe_vm, ...@@ -465,16 +488,16 @@ DECLARE_EVENT_CLASS(xe_vm,
TP_ARGS(vm), TP_ARGS(vm),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(u64, vm) __field(struct xe_vm *, vm)
__field(u32, asid) __field(u32, asid)
), ),
TP_fast_assign( TP_fast_assign(
__entry->vm = (unsigned long)vm; __entry->vm = vm;
__entry->asid = vm->usm.asid; __entry->asid = vm->usm.asid;
), ),
TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, TP_printk("vm=%p, asid=0x%05x", __entry->vm,
__entry->asid) __entry->asid)
); );
......
...@@ -897,6 +897,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma) ...@@ -897,6 +897,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
struct xe_device *xe = vm->xe; struct xe_device *xe = vm->xe;
bool read_only = xe_vma_read_only(vma); bool read_only = xe_vma_read_only(vma);
if (vma->ufence) {
xe_sync_ufence_put(vma->ufence);
vma->ufence = NULL;
}
if (xe_vma_is_userptr(vma)) { if (xe_vma_is_userptr(vma)) {
struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
...@@ -1608,6 +1613,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, ...@@ -1608,6 +1613,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
trace_xe_vma_unbind(vma); trace_xe_vma_unbind(vma);
if (vma->ufence) {
struct xe_user_fence * const f = vma->ufence;
if (!xe_sync_ufence_get_status(f))
return ERR_PTR(-EBUSY);
vma->ufence = NULL;
xe_sync_ufence_put(f);
}
if (number_tiles > 1) { if (number_tiles > 1) {
fences = kmalloc_array(number_tiles, sizeof(*fences), fences = kmalloc_array(number_tiles, sizeof(*fences),
GFP_KERNEL); GFP_KERNEL);
...@@ -1741,6 +1756,21 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q, ...@@ -1741,6 +1756,21 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
return ERR_PTR(err); return ERR_PTR(err);
} }
static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
{
unsigned int i;
for (i = 0; i < num_syncs; i++) {
struct xe_sync_entry *e = &syncs[i];
if (xe_sync_is_ufence(e))
return xe_sync_ufence_get(e);
}
return NULL;
}
static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
struct xe_exec_queue *q, struct xe_sync_entry *syncs, struct xe_exec_queue *q, struct xe_sync_entry *syncs,
u32 num_syncs, bool immediate, bool first_op, u32 num_syncs, bool immediate, bool first_op,
...@@ -1748,9 +1778,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, ...@@ -1748,9 +1778,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
{ {
struct dma_fence *fence; struct dma_fence *fence;
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
struct xe_user_fence *ufence;
xe_vm_assert_held(vm); xe_vm_assert_held(vm);
ufence = find_ufence_get(syncs, num_syncs);
if (vma->ufence && ufence)
xe_sync_ufence_put(vma->ufence);
vma->ufence = ufence ?: vma->ufence;
if (immediate) { if (immediate) {
fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op,
last_op); last_op);
...@@ -2117,10 +2154,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, ...@@ -2117,10 +2154,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
struct xe_vma_op *op = gpuva_op_to_vma_op(__op); struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
if (__op->op == DRM_GPUVA_OP_MAP) { if (__op->op == DRM_GPUVA_OP_MAP) {
op->map.immediate =
flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
op->map.read_only =
flags & DRM_XE_VM_BIND_FLAG_READONLY;
op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
op->map.pat_index = pat_index; op->map.pat_index = pat_index;
} else if (__op->op == DRM_GPUVA_OP_PREFETCH) { } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
...@@ -2313,8 +2346,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, ...@@ -2313,8 +2346,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
switch (op->base.op) { switch (op->base.op) {
case DRM_GPUVA_OP_MAP: case DRM_GPUVA_OP_MAP:
{ {
flags |= op->map.read_only ?
VMA_CREATE_FLAG_READ_ONLY : 0;
flags |= op->map.is_null ? flags |= op->map.is_null ?
VMA_CREATE_FLAG_IS_NULL : 0; VMA_CREATE_FLAG_IS_NULL : 0;
...@@ -2445,7 +2476,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, ...@@ -2445,7 +2476,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
case DRM_GPUVA_OP_MAP: case DRM_GPUVA_OP_MAP:
err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
op->syncs, op->num_syncs, op->syncs, op->num_syncs,
op->map.immediate || !xe_vm_in_fault_mode(vm), !xe_vm_in_fault_mode(vm),
op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_FIRST,
op->flags & XE_VMA_OP_LAST); op->flags & XE_VMA_OP_LAST);
break; break;
...@@ -2720,14 +2751,11 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, ...@@ -2720,14 +2751,11 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
return 0; return 0;
} }
#define SUPPORTED_FLAGS \ #define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \
(DRM_XE_VM_BIND_FLAG_READONLY | \ DRM_XE_VM_BIND_FLAG_DUMPABLE)
DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL)
#define XE_64K_PAGE_MASK 0xffffull #define XE_64K_PAGE_MASK 0xffffull
#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
#define MAX_BINDS 512 /* FIXME: Picking random upper limit */
static int vm_bind_ioctl_check_args(struct xe_device *xe, static int vm_bind_ioctl_check_args(struct xe_device *xe,
struct drm_xe_vm_bind *args, struct drm_xe_vm_bind *args,
struct drm_xe_vm_bind_op **bind_ops) struct drm_xe_vm_bind_op **bind_ops)
...@@ -2739,16 +2767,16 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, ...@@ -2739,16 +2767,16 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
return -EINVAL; return -EINVAL;
if (XE_IOCTL_DBG(xe, args->extensions) || if (XE_IOCTL_DBG(xe, args->extensions))
XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS))
return -EINVAL; return -EINVAL;
if (args->num_binds > 1) { if (args->num_binds > 1) {
u64 __user *bind_user = u64 __user *bind_user =
u64_to_user_ptr(args->vector_of_binds); u64_to_user_ptr(args->vector_of_binds);
*bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * *bind_ops = kvmalloc_array(args->num_binds,
args->num_binds, GFP_KERNEL); sizeof(struct drm_xe_vm_bind_op),
GFP_KERNEL | __GFP_ACCOUNT);
if (!*bind_ops) if (!*bind_ops)
return -ENOMEM; return -ENOMEM;
...@@ -2838,7 +2866,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, ...@@ -2838,7 +2866,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
free_bind_ops: free_bind_ops:
if (args->num_binds > 1) if (args->num_binds > 1)
kfree(*bind_ops); kvfree(*bind_ops);
return err; return err;
} }
...@@ -2926,13 +2954,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -2926,13 +2954,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
} }
if (args->num_binds) { if (args->num_binds) {
bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); bos = kvcalloc(args->num_binds, sizeof(*bos),
GFP_KERNEL | __GFP_ACCOUNT);
if (!bos) { if (!bos) {
err = -ENOMEM; err = -ENOMEM;
goto release_vm_lock; goto release_vm_lock;
} }
ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); ops = kvcalloc(args->num_binds, sizeof(*ops),
GFP_KERNEL | __GFP_ACCOUNT);
if (!ops) { if (!ops) {
err = -ENOMEM; err = -ENOMEM;
goto release_vm_lock; goto release_vm_lock;
...@@ -3073,10 +3103,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -3073,10 +3103,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
for (i = 0; bos && i < args->num_binds; ++i) for (i = 0; bos && i < args->num_binds; ++i)
xe_bo_put(bos[i]); xe_bo_put(bos[i]);
kfree(bos); kvfree(bos);
kfree(ops); kvfree(ops);
if (args->num_binds > 1) if (args->num_binds > 1)
kfree(bind_ops); kvfree(bind_ops);
return err; return err;
...@@ -3100,10 +3130,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -3100,10 +3130,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (q) if (q)
xe_exec_queue_put(q); xe_exec_queue_put(q);
free_objs: free_objs:
kfree(bos); kvfree(bos);
kfree(ops); kvfree(ops);
if (args->num_binds > 1) if (args->num_binds > 1)
kfree(bind_ops); kvfree(bind_ops);
return err; return err;
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
struct xe_bo; struct xe_bo;
struct xe_sync_entry; struct xe_sync_entry;
struct xe_user_fence;
struct xe_vm; struct xe_vm;
#define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS
...@@ -104,6 +105,12 @@ struct xe_vma { ...@@ -104,6 +105,12 @@ struct xe_vma {
* @pat_index: The pat index to use when encoding the PTEs for this vma. * @pat_index: The pat index to use when encoding the PTEs for this vma.
*/ */
u16 pat_index; u16 pat_index;
/**
* @ufence: The user fence that was provided with MAP.
* Needs to be signalled before UNMAP can be processed.
*/
struct xe_user_fence *ufence;
}; };
/** /**
...@@ -288,10 +295,6 @@ struct xe_vm { ...@@ -288,10 +295,6 @@ struct xe_vm {
struct xe_vma_op_map { struct xe_vma_op_map {
/** @vma: VMA to map */ /** @vma: VMA to map */
struct xe_vma *vma; struct xe_vma *vma;
/** @immediate: Immediate bind */
bool immediate;
/** @read_only: Read only */
bool read_only;
/** @is_null: is NULL binding */ /** @is_null: is NULL binding */
bool is_null; bool is_null;
/** @pat_index: The pat index to use for this operation. */ /** @pat_index: The pat index to use for this operation. */
......
...@@ -831,11 +831,6 @@ struct drm_xe_vm_destroy { ...@@ -831,11 +831,6 @@ struct drm_xe_vm_destroy {
* - %DRM_XE_VM_BIND_OP_PREFETCH * - %DRM_XE_VM_BIND_OP_PREFETCH
* *
* and the @flags can be: * and the @flags can be:
* - %DRM_XE_VM_BIND_FLAG_READONLY
* - %DRM_XE_VM_BIND_FLAG_ASYNC
* - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the
* MAP operation immediately rather than deferring the MAP to the page
* fault handler.
* - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page
* tables are setup with a special bit which indicates writes are * tables are setup with a special bit which indicates writes are
* dropped and all reads return zero. In the future, the NULL flags * dropped and all reads return zero. In the future, the NULL flags
...@@ -928,9 +923,8 @@ struct drm_xe_vm_bind_op { ...@@ -928,9 +923,8 @@ struct drm_xe_vm_bind_op {
/** @op: Bind operation to perform */ /** @op: Bind operation to perform */
__u32 op; __u32 op;
#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
/** @flags: Bind flags */ /** @flags: Bind flags */
__u32 flags; __u32 flags;
...@@ -1045,19 +1039,6 @@ struct drm_xe_exec_queue_create { ...@@ -1045,19 +1039,6 @@ struct drm_xe_exec_queue_create {
#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7
/* Monitor 128KB contiguous region with 4K sub-granularity */
#define DRM_XE_ACC_GRANULARITY_128K 0
/* Monitor 2MB contiguous region with 64KB sub-granularity */
#define DRM_XE_ACC_GRANULARITY_2M 1
/* Monitor 16MB contiguous region with 512KB sub-granularity */
#define DRM_XE_ACC_GRANULARITY_16M 2
/* Monitor 64MB contiguous region with 2M sub-granularity */
#define DRM_XE_ACC_GRANULARITY_64M 3
/** @extensions: Pointer to the first extension struct, if any */ /** @extensions: Pointer to the first extension struct, if any */
__u64 extensions; __u64 extensions;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment