Commit b9d126e7 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Remove partial attempt to swizzle on pread/pwrite

Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
objects was flawed due to the simple fact that we do not always know the
swizzling for a particular page (due to the swizzling varying based on
location in certain unbalanced configurations). Furthermore, the
pread/pwrite paths are now unbalanced in that we are required to use the
GTT as in some cases we do not have direct CPU access to the backing
physical pages (thus some paths trying to account for the swizzle, but
others neglecting, chaos ensues).

There are no known users who do use pread/pwrite into a tiled object
(you need to manually detile anyway, so why now just use mmap and avoid
the copy?) and no user bug reports to indicate that it is being used in
the wild. As no one is hitting the buggy path, we can just remove the
buggy code.

v2: Just use the fault allowing kmap() + normal copy_(to|from)_user
v3: Avoid int overflow in computing 'length' from 'remain' (Tvrtko)

References: fe115628 ("drm/i915: Implement pwrite without struct-mutex")
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Acked-by: default avatarKenneth Graunke <kenneth@whitecape.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20190105120758.9237-1-chris@chris-wilson.co.uk
parent 55c15512
......@@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
obj->write_domain = 0;
}
static inline int
__copy_to_user_swizzled(char __user *cpu_vaddr,
const char *gpu_vaddr, int gpu_offset,
int length)
{
int ret, cpu_offset = 0;
while (length > 0) {
int cacheline_end = ALIGN(gpu_offset + 1, 64);
int this_length = min(cacheline_end - gpu_offset, length);
int swizzled_gpu_offset = gpu_offset ^ 64;
ret = __copy_to_user(cpu_vaddr + cpu_offset,
gpu_vaddr + swizzled_gpu_offset,
this_length);
if (ret)
return ret + length;
cpu_offset += this_length;
gpu_offset += this_length;
length -= this_length;
}
return 0;
}
static inline int
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
const char __user *cpu_vaddr,
int length)
{
int ret, cpu_offset = 0;
while (length > 0) {
int cacheline_end = ALIGN(gpu_offset + 1, 64);
int this_length = min(cacheline_end - gpu_offset, length);
int swizzled_gpu_offset = gpu_offset ^ 64;
ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
cpu_vaddr + cpu_offset,
this_length);
if (ret)
return ret + length;
cpu_offset += this_length;
gpu_offset += this_length;
length -= this_length;
}
return 0;
}
/*
* Pins the specified object's pages and synchronizes the object with
* GPU accesses. Sets needs_clflush to non-zero if the caller should
......@@ -1030,72 +978,23 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
return ret;
}
static void
shmem_clflush_swizzled_range(char *addr, unsigned long length,
bool swizzled)
{
if (unlikely(swizzled)) {
unsigned long start = (unsigned long) addr;
unsigned long end = (unsigned long) addr + length;
/* For swizzling simply ensure that we always flush both
* channels. Lame, but simple and it works. Swizzled
* pwrite/pread is far from a hotpath - current userspace
* doesn't use it at all. */
start = round_down(start, 128);
end = round_up(end, 128);
drm_clflush_virt_range((void *)start, end - start);
} else {
drm_clflush_virt_range(addr, length);
}
}
/* Only difference to the fast-path function is that this can handle bit17
* and uses non-atomic copy and kmap functions. */
static int
shmem_pread_slow(struct page *page, int offset, int length,
char __user *user_data,
bool page_do_bit17_swizzling, bool needs_clflush)
shmem_pread(struct page *page, int offset, int len, char __user *user_data,
bool needs_clflush)
{
char *vaddr;
int ret;
vaddr = kmap(page);
if (needs_clflush)
shmem_clflush_swizzled_range(vaddr + offset, length,
page_do_bit17_swizzling);
if (page_do_bit17_swizzling)
ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
else
ret = __copy_to_user(user_data, vaddr + offset, length);
kunmap(page);
return ret ? - EFAULT : 0;
}
static int
shmem_pread(struct page *page, int offset, int length, char __user *user_data,
bool page_do_bit17_swizzling, bool needs_clflush)
{
int ret;
if (needs_clflush)
drm_clflush_virt_range(vaddr + offset, len);
ret = -ENODEV;
if (!page_do_bit17_swizzling) {
char *vaddr = kmap_atomic(page);
ret = __copy_to_user(user_data, vaddr + offset, len);
if (needs_clflush)
drm_clflush_virt_range(vaddr + offset, length);
ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
kunmap_atomic(vaddr);
}
if (ret == 0)
return 0;
kunmap(page);
return shmem_pread_slow(page, offset, length, user_data,
page_do_bit17_swizzling, needs_clflush);
return ret ? -EFAULT : 0;
}
static int
......@@ -1104,15 +1003,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
{
char __user *user_data;
u64 remain;
unsigned int obj_do_bit17_swizzling;
unsigned int needs_clflush;
unsigned int idx, offset;
int ret;
obj_do_bit17_swizzling = 0;
if (i915_gem_object_needs_bit17_swizzle(obj))
obj_do_bit17_swizzling = BIT(17);
ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
if (ret)
return ret;
......@@ -1130,7 +1024,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
ret = shmem_pread(page, offset, length, user_data,
page_to_phys(page) & obj_do_bit17_swizzling,
needs_clflush);
if (ret)
break;
......@@ -1471,33 +1364,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
return ret;
}
static int
shmem_pwrite_slow(struct page *page, int offset, int length,
char __user *user_data,
bool page_do_bit17_swizzling,
bool needs_clflush_before,
bool needs_clflush_after)
{
char *vaddr;
int ret;
vaddr = kmap(page);
if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
shmem_clflush_swizzled_range(vaddr + offset, length,
page_do_bit17_swizzling);
if (page_do_bit17_swizzling)
ret = __copy_from_user_swizzled(vaddr, offset, user_data,
length);
else
ret = __copy_from_user(vaddr + offset, user_data, length);
if (needs_clflush_after)
shmem_clflush_swizzled_range(vaddr + offset, length,
page_do_bit17_swizzling);
kunmap(page);
return ret ? -EFAULT : 0;
}
/* Per-page copy function for the shmem pwrite fastpath.
* Flushes invalid cachelines before writing to the target if
* needs_clflush_before is set and flushes out any written cachelines after
......@@ -1505,31 +1371,24 @@ shmem_pwrite_slow(struct page *page, int offset, int length,
*/
static int
shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
bool page_do_bit17_swizzling,
bool needs_clflush_before,
bool needs_clflush_after)
{
char *vaddr;
int ret;
ret = -ENODEV;
if (!page_do_bit17_swizzling) {
char *vaddr = kmap_atomic(page);
vaddr = kmap(page);
if (needs_clflush_before)
drm_clflush_virt_range(vaddr + offset, len);
ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
if (needs_clflush_after)
drm_clflush_virt_range(vaddr + offset, len);
if (needs_clflush_before)
drm_clflush_virt_range(vaddr + offset, len);
kunmap_atomic(vaddr);
}
if (ret == 0)
return ret;
ret = __copy_from_user(vaddr + offset, user_data, len);
if (!ret && needs_clflush_after)
drm_clflush_virt_range(vaddr + offset, len);
kunmap(page);
return shmem_pwrite_slow(page, offset, len, user_data,
page_do_bit17_swizzling,
needs_clflush_before,
needs_clflush_after);
return ret ? -EFAULT : 0;
}
static int
......@@ -1539,7 +1398,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
struct drm_i915_private *i915 = to_i915(obj->base.dev);
void __user *user_data;
u64 remain;
unsigned int obj_do_bit17_swizzling;
unsigned int partial_cacheline_write;
unsigned int needs_clflush;
unsigned int offset, idx;
......@@ -1554,10 +1412,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
if (ret)
return ret;
obj_do_bit17_swizzling = 0;
if (i915_gem_object_needs_bit17_swizzle(obj))
obj_do_bit17_swizzling = BIT(17);
/* If we don't overwrite a cacheline completely we need to be
* careful to have up-to-date data by first clflushing. Don't
* overcomplicate things and flush the entire patch.
......@@ -1574,7 +1428,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
ret = shmem_pwrite(page, offset, length, user_data,
page_to_phys(page) & obj_do_bit17_swizzling,
(offset | length) & partial_cacheline_write,
needs_clflush & CLFLUSH_AFTER);
if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment