Commit ed29c269 authored by Maarten Lankhorst's avatar Maarten Lankhorst Committed by Daniel Vetter

drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.

Instead of doing what we do currently, which will never work with
PROVE_LOCKING, do the same as AMD does, and something similar to
relocation slowpath. When all locks are dropped, we acquire the
pages for pinning. When the locks are taken, we transfer those
pages in .get_pages() to the bo. As a final check before installing
the fences, we ensure that the mmu notifier was not called; if it is,
we return -EAGAIN to userspace to signal it has to start over.

Changes since v1:
- Unbinding is done in submit_init only. submit_begin() removed.
- MMU_NOTFIER -> MMU_NOTIFIER
Changes since v2:
- Make i915->mm.notifier a spinlock.
Changes since v3:
- Add WARN_ON if there are any page references left, should have been 0.
- Return 0 on success in submit_init(), bug from spinlock conversion.
- Release pvec outside of notifier_lock (Thomas).
Changes since v4:
- Mention why we're clearing eb->[i + 1].vma in the code. (Thomas)
- Actually check all invalidations in eb_move_to_gpu. (Thomas)
- Do not wait when process is exiting to fix gem_ctx_persistence.userptr.
Changes since v5:
- Clarify why check on PF_EXITING is (temporarily) required.
Changes since v6:
- Ensure userptr validity is checked in set_domain through a special path.
Signed-off-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: default avatarDave Airlie <airlied@redhat.com>
[danvet: s/kfree/kvfree/ in i915_gem_object_userptr_drop_ref in the
previous review round, but which got lost. The other open questions
around page refcount are imo better discussed in a separate series,
with amdgpu folks involved].
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210323155059.628690-17-maarten.lankhorst@linux.intel.com
parent 20ee27bd
......@@ -533,14 +533,28 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (err)
goto out;
if (i915_gem_object_is_userptr(obj)) {
/*
* Try to grab userptr pages, iris uses set_domain to check
* userptr validity
*/
err = i915_gem_object_userptr_validate(obj);
if (!err)
err = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_PRIORITY |
(write_domain ? I915_WAIT_ALL : 0),
MAX_SCHEDULE_TIMEOUT);
goto out;
}
/*
* Proxy objects do not control access to the backing storage, ergo
* they cannot be used as a means to manipulate the cache domain
* tracking for that backing storage. The proxy object is always
* considered to be outside of any cache domain.
*/
if (i915_gem_object_is_proxy(obj) &&
!i915_gem_object_is_userptr(obj)) {
if (i915_gem_object_is_proxy(obj)) {
err = -ENXIO;
goto out;
}
......
......@@ -53,14 +53,16 @@ enum {
/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
#define __EXEC_OBJECT_HAS_PIN BIT(30)
#define __EXEC_OBJECT_HAS_FENCE BIT(29)
#define __EXEC_OBJECT_NEEDS_MAP BIT(28)
#define __EXEC_OBJECT_NEEDS_BIAS BIT(27)
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above + */
#define __EXEC_OBJECT_USERPTR_INIT BIT(28)
#define __EXEC_OBJECT_NEEDS_MAP BIT(27)
#define __EXEC_OBJECT_NEEDS_BIAS BIT(26)
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */
#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
#define __EXEC_ENGINE_PINNED BIT(30)
#define __EXEC_INTERNAL_FLAGS (~0u << 30)
#define __EXEC_USERPTR_USED BIT(29)
#define __EXEC_INTERNAL_FLAGS (~0u << 29)
#define UPDATE PIN_OFFSET_FIXED
#define BATCH_OFFSET_BIAS (256*1024)
......@@ -871,6 +873,26 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
}
eb_add_vma(eb, i, batch, vma);
if (i915_gem_object_is_userptr(vma->obj)) {
err = i915_gem_object_userptr_submit_init(vma->obj);
if (err) {
if (i + 1 < eb->buffer_count) {
/*
* Execbuffer code expects last vma entry to be NULL,
* since we already initialized this entry,
* set the next value to NULL or we mess up
* cleanup handling.
*/
eb->vma[i + 1].vma = NULL;
}
return err;
}
eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
eb->args->flags |= __EXEC_USERPTR_USED;
}
}
if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
......@@ -972,7 +994,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
}
}
static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr)
{
const unsigned int count = eb->buffer_count;
unsigned int i;
......@@ -986,6 +1008,11 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
eb_unreserve_vma(ev);
if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) {
ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT;
i915_gem_object_userptr_submit_fini(vma->obj);
}
if (final)
i915_vma_put(vma);
}
......@@ -1923,6 +1950,31 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
return 0;
}
static int eb_reinit_userptr(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i;
int ret;
if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
return 0;
for (i = 0; i < count; i++) {
struct eb_vma *ev = &eb->vma[i];
if (!i915_gem_object_is_userptr(ev->vma->obj))
continue;
ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
if (ret)
return ret;
ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
}
return 0;
}
static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
struct i915_request *rq)
{
......@@ -1937,7 +1989,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
}
/* We may process another execbuffer during the unlock... */
eb_release_vmas(eb, false);
eb_release_vmas(eb, false, true);
i915_gem_ww_ctx_fini(&eb->ww);
if (rq) {
......@@ -1978,10 +2030,8 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
err = 0;
}
#ifdef CONFIG_MMU_NOTIFIER
if (!err)
flush_workqueue(eb->i915->mm.userptr_wq);
#endif
err = eb_reinit_userptr(eb);
err_relock:
i915_gem_ww_ctx_init(&eb->ww, true);
......@@ -2043,7 +2093,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
err:
if (err == -EDEADLK) {
eb_release_vmas(eb, false);
eb_release_vmas(eb, false, false);
err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
goto repeat_validate;
......@@ -2140,7 +2190,7 @@ static int eb_relocate_parse(struct i915_execbuffer *eb)
err:
if (err == -EDEADLK) {
eb_release_vmas(eb, false);
eb_release_vmas(eb, false, false);
err = i915_gem_ww_ctx_backoff(&eb->ww);
if (!err)
goto retry;
......@@ -2215,6 +2265,30 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
flags | __EXEC_OBJECT_NO_RESERVE);
}
#ifdef CONFIG_MMU_NOTIFIER
if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
spin_lock(&eb->i915->mm.notifier_lock);
/*
* count is always at least 1, otherwise __EXEC_USERPTR_USED
* could not have been set
*/
for (i = 0; i < count; i++) {
struct eb_vma *ev = &eb->vma[i];
struct drm_i915_gem_object *obj = ev->vma->obj;
if (!i915_gem_object_is_userptr(obj))
continue;
err = i915_gem_object_userptr_submit_done(obj);
if (err)
break;
}
spin_unlock(&eb->i915->mm.notifier_lock);
}
#endif
if (unlikely(err))
goto err_skip;
......@@ -3359,7 +3433,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err = eb_lookup_vmas(&eb);
if (err) {
eb_release_vmas(&eb, true);
eb_release_vmas(&eb, true, true);
goto err_engine;
}
......@@ -3431,6 +3505,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb, batch);
err_request:
i915_request_get(eb.request);
err = eb_request_add(&eb, err);
......@@ -3451,7 +3526,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
i915_request_put(eb.request);
err_vma:
eb_release_vmas(&eb, true);
eb_release_vmas(&eb, true, true);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
WARN_ON(err == -EDEADLK);
......
......@@ -33,6 +33,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
const void *data, resource_size_t size);
extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
struct sg_table *pages,
bool needs_clflush);
......@@ -252,12 +253,6 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj)
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_MMAP);
}
static inline bool
i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
{
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL);
}
static inline bool
i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
{
......@@ -548,16 +543,6 @@ void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
enum fb_op_origin origin);
static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
{
#ifdef CONFIG_MMU_NOTIFIER
return obj->userptr.mm;
#else
return false;
#endif
}
static inline void
i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
enum fb_op_origin origin)
......@@ -578,4 +563,25 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,
bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj);
#ifdef CONFIG_MMU_NOTIFIER
static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
{
return obj->userptr.notifier.mm;
}
int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj);
void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj);
#else
static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; }
static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); }
static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
#endif
#endif
......@@ -7,6 +7,8 @@
#ifndef __I915_GEM_OBJECT_TYPES_H__
#define __I915_GEM_OBJECT_TYPES_H__
#include <linux/mmu_notifier.h>
#include <drm/drm_gem.h>
#include <uapi/drm/i915_drm.h>
......@@ -34,7 +36,6 @@ struct drm_i915_gem_object_ops {
#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2)
#define I915_GEM_OBJECT_IS_PROXY BIT(3)
#define I915_GEM_OBJECT_NO_MMAP BIT(4)
#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5)
/* Interface between the GEM object and its backing storage.
* get_pages() is called once prior to the use of the associated set
......@@ -293,10 +294,11 @@ struct drm_i915_gem_object {
#ifdef CONFIG_MMU_NOTIFIER
struct i915_gem_userptr {
uintptr_t ptr;
unsigned long notifier_seq;
struct i915_mm_struct *mm;
struct i915_mmu_object *mmu_object;
struct work_struct *work;
struct mmu_interval_notifier notifier;
struct page **pvec;
int page_ref;
} userptr;
#endif
......
......@@ -226,7 +226,7 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
* get_pages backends we should be better able to handle the
* cancellation of the async task in a more uniform manner.
*/
if (!pages && !i915_gem_object_needs_async_cancel(obj))
if (!pages)
pages = ERR_PTR(-EINVAL);
if (!IS_ERR(pages))
......
This diff is collapsed.
......@@ -556,11 +556,10 @@ struct i915_gem_mm {
#ifdef CONFIG_MMU_NOTIFIER
/**
* Workqueue to fault in userptr pages, flushed by the execbuf
* when required but otherwise left to userspace to try again
* on EAGAIN.
* notifier_lock for mmu notifiers, memory may not be allocated
* while holding this lock.
*/
struct workqueue_struct *userptr_wq;
spinlock_t notifier_lock;
#endif
/* shrinker accounting, also useful for userland debugging */
......@@ -940,8 +939,6 @@ struct drm_i915_private {
struct i915_ggtt ggtt; /* VM representing the global address space */
struct i915_gem_mm mm;
DECLARE_HASHTABLE(mm_structs, 7);
spinlock_t mm_lock;
/* Kernel Modesetting */
......
......@@ -1075,10 +1075,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
err_unlock:
i915_gem_drain_workqueue(dev_priv);
if (ret != -EIO) {
if (ret != -EIO)
intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
i915_gem_cleanup_userptr(dev_priv);
}
if (ret == -EIO) {
/*
......@@ -1135,7 +1133,6 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv)
intel_wa_list_free(&dev_priv->gt_wa_list);
intel_uc_cleanup_firmwares(&dev_priv->gt.uc);
i915_gem_cleanup_userptr(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment