Commit 3bdd4f84 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Rely on spinlock protection for GPU error capture

Trust that we now have adequate protection over the low level structures
via the engine->active.lock to allow ourselves to capture the GPU error
state without the heavy hammer of stop_machine(). Sadly this does mean
that we have to forgo some of the lesser used information (not derived
from the active state) that is not controlled by the active locks. This
includes the list of buffers in the ppGTT and pinned globally in the
GGTT. Originally this was used to manually verify relocations, but
hasn't been required for sometime and modern mesa now has the habit of
ensuring that all interesting buffers within a batch are captured in their
entirety (that are the auxiliary state buffers, but not the textures).

A useful side-effect is that this allows us to restore error capturing
for Braswell and Broxton.

v2: Use pagevec for a typical arbitrary number of preallocated pages
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190722222847.24178-1-chris@chris-wilson.co.uk
parent df8cf31e
...@@ -2970,11 +2970,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ...@@ -2970,11 +2970,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
if (ggtt->vm.clear_range != nop_clear_range) if (ggtt->vm.clear_range != nop_clear_range)
ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
/* Prevent recursively calling stop_machine() and deadlocks. */
dev_info(dev_priv->drm.dev,
"Disabling error capture for VT-d workaround\n");
i915_disable_error_state(dev_priv, -ENODEV);
} }
ggtt->invalidate = gen6_ggtt_invalidate; ggtt->invalidate = gen6_ggtt_invalidate;
......
...@@ -29,8 +29,8 @@ ...@@ -29,8 +29,8 @@
#include <linux/ascii85.h> #include <linux/ascii85.h>
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/pagevec.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/stop_machine.h>
#include <linux/utsname.h> #include <linux/utsname.h>
#include <linux/zlib.h> #include <linux/zlib.h>
...@@ -46,6 +46,9 @@ ...@@ -46,6 +46,9 @@
#include "i915_scatterlist.h" #include "i915_scatterlist.h"
#include "intel_csr.h" #include "intel_csr.h"
#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
static inline const struct intel_engine_cs * static inline const struct intel_engine_cs *
engine_lookup(const struct drm_i915_private *i915, unsigned int id) engine_lookup(const struct drm_i915_private *i915, unsigned int id)
{ {
...@@ -67,26 +70,6 @@ engine_name(const struct drm_i915_private *i915, unsigned int id) ...@@ -67,26 +70,6 @@ engine_name(const struct drm_i915_private *i915, unsigned int id)
return __engine_name(engine_lookup(i915, id)); return __engine_name(engine_lookup(i915, id));
} }
static const char *tiling_flag(int tiling)
{
switch (tiling) {
default:
case I915_TILING_NONE: return "";
case I915_TILING_X: return " X";
case I915_TILING_Y: return " Y";
}
}
static const char *dirty_flag(int dirty)
{
return dirty ? " dirty" : "";
}
static const char *purgeable_flag(int purgeable)
{
return purgeable ? " purgeable" : "";
}
static void __sg_set_buf(struct scatterlist *sg, static void __sg_set_buf(struct scatterlist *sg,
void *addr, unsigned int len, loff_t it) void *addr, unsigned int len, loff_t it)
{ {
...@@ -114,7 +97,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len) ...@@ -114,7 +97,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
if (e->cur == e->end) { if (e->cur == e->end) {
struct scatterlist *sgl; struct scatterlist *sgl;
sgl = (typeof(sgl))__get_free_page(GFP_KERNEL); sgl = (typeof(sgl))__get_free_page(ALLOW_FAIL);
if (!sgl) { if (!sgl) {
e->err = -ENOMEM; e->err = -ENOMEM;
return false; return false;
...@@ -134,7 +117,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len) ...@@ -134,7 +117,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
} }
e->size = ALIGN(len + 1, SZ_64K); e->size = ALIGN(len + 1, SZ_64K);
e->buf = kmalloc(e->size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); e->buf = kmalloc(e->size, ALLOW_FAIL);
if (!e->buf) { if (!e->buf) {
e->size = PAGE_ALIGN(len + 1); e->size = PAGE_ALIGN(len + 1);
e->buf = kmalloc(e->size, GFP_KERNEL); e->buf = kmalloc(e->size, GFP_KERNEL);
...@@ -211,47 +194,115 @@ i915_error_printer(struct drm_i915_error_state_buf *e) ...@@ -211,47 +194,115 @@ i915_error_printer(struct drm_i915_error_state_buf *e)
return p; return p;
} }
/* single threaded page allocator with a reserved stash for emergencies */
static void pool_fini(struct pagevec *pv)
{
pagevec_release(pv);
}
static int pool_refill(struct pagevec *pv, gfp_t gfp)
{
while (pagevec_space(pv)) {
struct page *p;
p = alloc_page(gfp);
if (!p)
return -ENOMEM;
pagevec_add(pv, p);
}
return 0;
}
static int pool_init(struct pagevec *pv, gfp_t gfp)
{
int err;
pagevec_init(pv);
err = pool_refill(pv, gfp);
if (err)
pool_fini(pv);
return err;
}
static void *pool_alloc(struct pagevec *pv, gfp_t gfp)
{
struct page *p;
p = alloc_page(gfp);
if (!p && pagevec_count(pv))
p = pv->pages[--pv->nr];
return p ? page_address(p) : NULL;
}
static void pool_free(struct pagevec *pv, void *addr)
{
struct page *p = virt_to_page(addr);
if (pagevec_space(pv))
pagevec_add(pv, p);
else
__free_page(p);
}
#ifdef CONFIG_DRM_I915_COMPRESS_ERROR #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
struct compress { struct compress {
struct pagevec pool;
struct z_stream_s zstream; struct z_stream_s zstream;
void *tmp; void *tmp;
}; };
static bool compress_init(struct compress *c) static bool compress_init(struct compress *c)
{ {
struct z_stream_s *zstream = memset(&c->zstream, 0, sizeof(c->zstream)); struct z_stream_s *zstream = &c->zstream;
zstream->workspace = if (pool_init(&c->pool, ALLOW_FAIL))
kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
GFP_ATOMIC | __GFP_NOWARN);
if (!zstream->workspace)
return false; return false;
if (zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) != Z_OK) { zstream->workspace =
kfree(zstream->workspace); kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
ALLOW_FAIL);
if (!zstream->workspace) {
pool_fini(&c->pool);
return false; return false;
} }
c->tmp = NULL; c->tmp = NULL;
if (i915_has_memcpy_from_wc()) if (i915_has_memcpy_from_wc())
c->tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN); c->tmp = pool_alloc(&c->pool, ALLOW_FAIL);
return true; return true;
} }
static void *compress_next_page(struct drm_i915_error_object *dst) static bool compress_start(struct compress *c)
{
struct z_stream_s *zstream = &c->zstream;
void *workspace = zstream->workspace;
memset(zstream, 0, sizeof(*zstream));
zstream->workspace = workspace;
return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK;
}
static void *compress_next_page(struct compress *c,
struct drm_i915_error_object *dst)
{ {
unsigned long page; void *page;
if (dst->page_count >= dst->num_pages) if (dst->page_count >= dst->num_pages)
return ERR_PTR(-ENOSPC); return ERR_PTR(-ENOSPC);
page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); page = pool_alloc(&c->pool, ATOMIC_MAYFAIL);
if (!page) if (!page)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
return dst->pages[dst->page_count++] = (void *)page; return dst->pages[dst->page_count++] = page;
} }
static int compress_page(struct compress *c, static int compress_page(struct compress *c,
...@@ -267,7 +318,7 @@ static int compress_page(struct compress *c, ...@@ -267,7 +318,7 @@ static int compress_page(struct compress *c,
do { do {
if (zstream->avail_out == 0) { if (zstream->avail_out == 0) {
zstream->next_out = compress_next_page(dst); zstream->next_out = compress_next_page(c, dst);
if (IS_ERR(zstream->next_out)) if (IS_ERR(zstream->next_out))
return PTR_ERR(zstream->next_out); return PTR_ERR(zstream->next_out);
...@@ -295,7 +346,7 @@ static int compress_flush(struct compress *c, ...@@ -295,7 +346,7 @@ static int compress_flush(struct compress *c,
do { do {
switch (zlib_deflate(zstream, Z_FINISH)) { switch (zlib_deflate(zstream, Z_FINISH)) {
case Z_OK: /* more space requested */ case Z_OK: /* more space requested */
zstream->next_out = compress_next_page(dst); zstream->next_out = compress_next_page(c, dst);
if (IS_ERR(zstream->next_out)) if (IS_ERR(zstream->next_out))
return PTR_ERR(zstream->next_out); return PTR_ERR(zstream->next_out);
...@@ -316,15 +367,17 @@ static int compress_flush(struct compress *c, ...@@ -316,15 +367,17 @@ static int compress_flush(struct compress *c,
return 0; return 0;
} }
static void compress_fini(struct compress *c, static void compress_finish(struct compress *c)
struct drm_i915_error_object *dst)
{ {
struct z_stream_s *zstream = &c->zstream; zlib_deflateEnd(&c->zstream);
}
zlib_deflateEnd(zstream); static void compress_fini(struct compress *c)
kfree(zstream->workspace); {
kfree(c->zstream.workspace);
if (c->tmp) if (c->tmp)
free_page((unsigned long)c->tmp); pool_free(&c->pool, c->tmp);
pool_fini(&c->pool);
} }
static void err_compression_marker(struct drm_i915_error_state_buf *m) static void err_compression_marker(struct drm_i915_error_state_buf *m)
...@@ -335,9 +388,15 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) ...@@ -335,9 +388,15 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
#else #else
struct compress { struct compress {
struct pagevec pool;
}; };
static bool compress_init(struct compress *c) static bool compress_init(struct compress *c)
{
return pool_init(&c->pool, ALLOW_FAIL) == 0;
}
static bool compress_start(struct compress *c)
{ {
return true; return true;
} }
...@@ -346,14 +405,12 @@ static int compress_page(struct compress *c, ...@@ -346,14 +405,12 @@ static int compress_page(struct compress *c,
void *src, void *src,
struct drm_i915_error_object *dst) struct drm_i915_error_object *dst)
{ {
unsigned long page;
void *ptr; void *ptr;
page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); ptr = pool_alloc(&c->pool, ATOMIC_MAYFAIL);
if (!page) if (!ptr)
return -ENOMEM; return -ENOMEM;
ptr = (void *)page;
if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE)) if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE))
memcpy(ptr, src, PAGE_SIZE); memcpy(ptr, src, PAGE_SIZE);
dst->pages[dst->page_count++] = ptr; dst->pages[dst->page_count++] = ptr;
...@@ -367,9 +424,13 @@ static int compress_flush(struct compress *c, ...@@ -367,9 +424,13 @@ static int compress_flush(struct compress *c,
return 0; return 0;
} }
static void compress_fini(struct compress *c, static void compress_finish(struct compress *c)
struct drm_i915_error_object *dst) {
}
static void compress_fini(struct compress *c)
{ {
pool_fini(&c->pool);
} }
static void err_compression_marker(struct drm_i915_error_state_buf *m) static void err_compression_marker(struct drm_i915_error_state_buf *m)
...@@ -379,36 +440,6 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) ...@@ -379,36 +440,6 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
#endif #endif
static void print_error_buffers(struct drm_i915_error_state_buf *m,
const char *name,
struct drm_i915_error_buffer *err,
int count)
{
err_printf(m, "%s [%d]:\n", name, count);
while (count--) {
err_printf(m, " %08x_%08x %8u %02x %02x",
upper_32_bits(err->gtt_offset),
lower_32_bits(err->gtt_offset),
err->size,
err->read_domains,
err->write_domain);
err_puts(m, tiling_flag(err->tiling));
err_puts(m, dirty_flag(err->dirty));
err_puts(m, purgeable_flag(err->purgeable));
err_puts(m, err->userptr ? " userptr" : "");
err_puts(m, i915_cache_level_str(m->i915, err->cache_level));
if (err->name)
err_printf(m, " (name: %d)", err->name);
if (err->fence_reg != I915_FENCE_REG_NONE)
err_printf(m, " (fence: %d)", err->fence_reg);
err_puts(m, "\n");
err++;
}
}
static void error_print_instdone(struct drm_i915_error_state_buf *m, static void error_print_instdone(struct drm_i915_error_state_buf *m,
const struct drm_i915_error_engine *ee) const struct drm_i915_error_engine *ee)
{ {
...@@ -734,33 +765,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, ...@@ -734,33 +765,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
error_print_engine(m, &error->engine[i], error->epoch); error_print_engine(m, &error->engine[i], error->epoch);
} }
for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
char buf[128];
int len, first = 1;
if (!error->active_vm[i])
break;
len = scnprintf(buf, sizeof(buf), "Active (");
for (j = 0; j < ARRAY_SIZE(error->engine); j++) {
if (error->engine[j].vm != error->active_vm[i])
continue;
len += scnprintf(buf + len, sizeof(buf), "%s%s",
first ? "" : ", ",
m->i915->engine[j]->name);
first = 0;
}
scnprintf(buf + len, sizeof(buf), ")");
print_error_buffers(m, buf,
error->active_bo[i],
error->active_bo_count[i]);
}
print_error_buffers(m, "Pinned (global)",
error->pinned_bo,
error->pinned_bo_count);
for (i = 0; i < ARRAY_SIZE(error->engine); i++) { for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
const struct drm_i915_error_engine *ee = &error->engine[i]; const struct drm_i915_error_engine *ee = &error->engine[i];
...@@ -974,10 +978,6 @@ void __i915_gpu_state_free(struct kref *error_ref) ...@@ -974,10 +978,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
kfree(ee->requests); kfree(ee->requests);
} }
for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
kfree(error->active_bo[i]);
kfree(error->pinned_bo);
kfree(error->overlay); kfree(error->overlay);
kfree(error->display); kfree(error->display);
...@@ -990,12 +990,12 @@ void __i915_gpu_state_free(struct kref *error_ref) ...@@ -990,12 +990,12 @@ void __i915_gpu_state_free(struct kref *error_ref)
static struct drm_i915_error_object * static struct drm_i915_error_object *
i915_error_object_create(struct drm_i915_private *i915, i915_error_object_create(struct drm_i915_private *i915,
struct i915_vma *vma) struct i915_vma *vma,
struct compress *compress)
{ {
struct i915_ggtt *ggtt = &i915->ggtt; struct i915_ggtt *ggtt = &i915->ggtt;
const u64 slot = ggtt->error_capture.start; const u64 slot = ggtt->error_capture.start;
struct drm_i915_error_object *dst; struct drm_i915_error_object *dst;
struct compress compress;
unsigned long num_pages; unsigned long num_pages;
struct sgt_iter iter; struct sgt_iter iter;
dma_addr_t dma; dma_addr_t dma;
...@@ -1006,22 +1006,21 @@ i915_error_object_create(struct drm_i915_private *i915, ...@@ -1006,22 +1006,21 @@ i915_error_object_create(struct drm_i915_private *i915,
num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */ num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ATOMIC_MAYFAIL);
GFP_ATOMIC | __GFP_NOWARN);
if (!dst) if (!dst)
return NULL; return NULL;
if (!compress_start(compress)) {
kfree(dst);
return NULL;
}
dst->gtt_offset = vma->node.start; dst->gtt_offset = vma->node.start;
dst->gtt_size = vma->node.size; dst->gtt_size = vma->node.size;
dst->num_pages = num_pages; dst->num_pages = num_pages;
dst->page_count = 0; dst->page_count = 0;
dst->unused = 0; dst->unused = 0;
if (!compress_init(&compress)) {
kfree(dst);
return NULL;
}
ret = -EINVAL; ret = -EINVAL;
for_each_sgt_dma(dma, iter, vma->pages) { for_each_sgt_dma(dma, iter, vma->pages) {
void __iomem *s; void __iomem *s;
...@@ -1029,69 +1028,23 @@ i915_error_object_create(struct drm_i915_private *i915, ...@@ -1029,69 +1028,23 @@ i915_error_object_create(struct drm_i915_private *i915,
ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
ret = compress_page(&compress, (void __force *)s, dst); ret = compress_page(compress, (void __force *)s, dst);
io_mapping_unmap_atomic(s); io_mapping_unmap_atomic(s);
if (ret) if (ret)
break; break;
} }
if (ret || compress_flush(&compress, dst)) { if (ret || compress_flush(compress, dst)) {
while (dst->page_count--) while (dst->page_count--)
free_page((unsigned long)dst->pages[dst->page_count]); pool_free(&compress->pool, dst->pages[dst->page_count]);
kfree(dst); kfree(dst);
dst = NULL; dst = NULL;
} }
compress_finish(compress);
compress_fini(&compress, dst);
return dst; return dst;
} }
static void capture_bo(struct drm_i915_error_buffer *err,
struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
err->size = obj->base.size;
err->name = obj->base.name;
err->gtt_offset = vma->node.start;
err->read_domains = obj->read_domains;
err->write_domain = obj->write_domain;
err->fence_reg = vma->fence ? vma->fence->id : -1;
err->tiling = i915_gem_object_get_tiling(obj);
err->dirty = obj->mm.dirty;
err->purgeable = obj->mm.madv != I915_MADV_WILLNEED;
err->userptr = obj->userptr.mm != NULL;
err->cache_level = obj->cache_level;
}
static u32 capture_error_bo(struct drm_i915_error_buffer *err,
int count, struct list_head *head,
unsigned int flags)
#define ACTIVE_ONLY BIT(0)
#define PINNED_ONLY BIT(1)
{
struct i915_vma *vma;
int i = 0;
list_for_each_entry(vma, head, vm_link) {
if (!vma->obj)
continue;
if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma))
continue;
if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma))
continue;
capture_bo(err++, vma);
if (++i == count)
break;
}
return i;
}
/* /*
* Generate a semi-unique error code. The code is not meant to have meaning, The * Generate a semi-unique error code. The code is not meant to have meaning, The
* code's only purpose is to try to prevent false duplicated bug reports by * code's only purpose is to try to prevent false duplicated bug reports by
...@@ -1281,7 +1234,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, ...@@ -1281,7 +1234,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
if (!count) if (!count)
return; return;
ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC); ee->requests = kcalloc(count, sizeof(*ee->requests), ATOMIC_MAYFAIL);
if (!ee->requests) if (!ee->requests)
return; return;
...@@ -1349,8 +1302,10 @@ static void record_context(struct drm_i915_error_context *e, ...@@ -1349,8 +1302,10 @@ static void record_context(struct drm_i915_error_context *e,
e->active = atomic_read(&ctx->active_count); e->active = atomic_read(&ctx->active_count);
} }
static void request_record_user_bo(struct i915_request *request, static void
struct drm_i915_error_engine *ee) request_record_user_bo(struct i915_request *request,
struct drm_i915_error_engine *ee,
struct compress *compress)
{ {
struct i915_capture_list *c; struct i915_capture_list *c;
struct drm_i915_error_object **bo; struct drm_i915_error_object **bo;
...@@ -1362,18 +1317,20 @@ static void request_record_user_bo(struct i915_request *request, ...@@ -1362,18 +1317,20 @@ static void request_record_user_bo(struct i915_request *request,
if (!max) if (!max)
return; return;
bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC); bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
if (!bo) { if (!bo) {
/* If we can't capture everything, try to capture something. */ /* If we can't capture everything, try to capture something. */
max = min_t(long, max, PAGE_SIZE / sizeof(*bo)); max = min_t(long, max, PAGE_SIZE / sizeof(*bo));
bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC); bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
} }
if (!bo) if (!bo)
return; return;
count = 0; count = 0;
for (c = request->capture_list; c; c = c->next) { for (c = request->capture_list; c; c = c->next) {
bo[count] = i915_error_object_create(request->i915, c->vma); bo[count] = i915_error_object_create(request->i915,
c->vma,
compress);
if (!bo[count]) if (!bo[count])
break; break;
if (++count == max) if (++count == max)
...@@ -1386,7 +1343,8 @@ static void request_record_user_bo(struct i915_request *request, ...@@ -1386,7 +1343,8 @@ static void request_record_user_bo(struct i915_request *request,
static struct drm_i915_error_object * static struct drm_i915_error_object *
capture_object(struct drm_i915_private *dev_priv, capture_object(struct drm_i915_private *dev_priv,
struct drm_i915_gem_object *obj) struct drm_i915_gem_object *obj,
struct compress *compress)
{ {
if (obj && i915_gem_object_has_pages(obj)) { if (obj && i915_gem_object_has_pages(obj)) {
struct i915_vma fake = { struct i915_vma fake = {
...@@ -1396,13 +1354,14 @@ capture_object(struct drm_i915_private *dev_priv, ...@@ -1396,13 +1354,14 @@ capture_object(struct drm_i915_private *dev_priv,
.obj = obj, .obj = obj,
}; };
return i915_error_object_create(dev_priv, &fake); return i915_error_object_create(dev_priv, &fake, compress);
} else { } else {
return NULL; return NULL;
} }
} }
static void gem_record_rings(struct i915_gpu_state *error) static void
gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
{ {
struct drm_i915_private *i915 = error->i915; struct drm_i915_private *i915 = error->i915;
int i; int i;
...@@ -1420,6 +1379,9 @@ static void gem_record_rings(struct i915_gpu_state *error) ...@@ -1420,6 +1379,9 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->engine_id = i; ee->engine_id = i;
/* Refill our page pool before entering atomic section */
pool_refill(&compress->pool, ALLOW_FAIL);
error_record_engine_registers(error, engine, ee); error_record_engine_registers(error, engine, ee);
error_record_engine_execlists(engine, ee); error_record_engine_execlists(engine, ee);
...@@ -1429,8 +1391,6 @@ static void gem_record_rings(struct i915_gpu_state *error) ...@@ -1429,8 +1391,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
struct i915_gem_context *ctx = request->gem_context; struct i915_gem_context *ctx = request->gem_context;
struct intel_ring *ring = request->ring; struct intel_ring *ring = request->ring;
ee->vm = ctx->vm ?: &engine->gt->ggtt->vm;
record_context(&ee->context, ctx); record_context(&ee->context, ctx);
/* We need to copy these to an anonymous buffer /* We need to copy these to an anonymous buffer
...@@ -1438,17 +1398,21 @@ static void gem_record_rings(struct i915_gpu_state *error) ...@@ -1438,17 +1398,21 @@ static void gem_record_rings(struct i915_gpu_state *error)
* by userspace. * by userspace.
*/ */
ee->batchbuffer = ee->batchbuffer =
i915_error_object_create(i915, request->batch); i915_error_object_create(i915,
request->batch,
compress);
if (HAS_BROKEN_CS_TLB(i915)) if (HAS_BROKEN_CS_TLB(i915))
ee->wa_batchbuffer = ee->wa_batchbuffer =
i915_error_object_create(i915, i915_error_object_create(i915,
engine->gt->scratch); engine->gt->scratch,
request_record_user_bo(request, ee); compress);
request_record_user_bo(request, ee, compress);
ee->ctx = ee->ctx =
i915_error_object_create(i915, i915_error_object_create(i915,
request->hw_context->state); request->hw_context->state,
compress);
error->simulated |= error->simulated |=
i915_gem_context_no_error_capture(ctx); i915_gem_context_no_error_capture(ctx);
...@@ -1460,7 +1424,9 @@ static void gem_record_rings(struct i915_gpu_state *error) ...@@ -1460,7 +1424,9 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->cpu_ring_head = ring->head; ee->cpu_ring_head = ring->head;
ee->cpu_ring_tail = ring->tail; ee->cpu_ring_tail = ring->tail;
ee->ringbuffer = ee->ringbuffer =
i915_error_object_create(i915, ring->vma); i915_error_object_create(i915,
ring->vma,
compress);
engine_record_requests(engine, request, ee); engine_record_requests(engine, request, ee);
} }
...@@ -1468,89 +1434,21 @@ static void gem_record_rings(struct i915_gpu_state *error) ...@@ -1468,89 +1434,21 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->hws_page = ee->hws_page =
i915_error_object_create(i915, i915_error_object_create(i915,
engine->status_page.vma); engine->status_page.vma,
compress);
ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma);
ee->default_state = capture_object(i915, engine->default_state); ee->wa_ctx =
} i915_error_object_create(i915,
} engine->wa_ctx.vma,
compress);
static void gem_capture_vm(struct i915_gpu_state *error,
struct i915_address_space *vm,
int idx)
{
struct drm_i915_error_buffer *active_bo;
struct i915_vma *vma;
int count;
count = 0;
list_for_each_entry(vma, &vm->bound_list, vm_link)
if (i915_vma_is_active(vma))
count++;
active_bo = NULL;
if (count)
active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
if (active_bo)
count = capture_error_bo(active_bo,
count, &vm->bound_list,
ACTIVE_ONLY);
else
count = 0;
error->active_vm[idx] = vm;
error->active_bo[idx] = active_bo;
error->active_bo_count[idx] = count;
}
static void capture_active_buffers(struct i915_gpu_state *error)
{
int cnt = 0, i, j;
BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo));
BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));
/* Scan each engine looking for unique active contexts/vm */
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
struct drm_i915_error_engine *ee = &error->engine[i];
bool found;
if (!ee->vm)
continue;
found = false; ee->default_state =
for (j = 0; j < i && !found; j++) capture_object(i915, engine->default_state, compress);
found = error->engine[j].vm == ee->vm;
if (!found)
gem_capture_vm(error, ee->vm, cnt++);
} }
} }
static void capture_pinned_buffers(struct i915_gpu_state *error) static void
{ capture_uc_state(struct i915_gpu_state *error, struct compress *compress)
struct i915_address_space *vm = &error->i915->ggtt.vm;
struct drm_i915_error_buffer *bo;
struct i915_vma *vma;
int count;
count = 0;
list_for_each_entry(vma, &vm->bound_list, vm_link)
count++;
bo = NULL;
if (count)
bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
if (!bo)
return;
error->pinned_bo_count =
capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY);
error->pinned_bo = bo;
}
static void capture_uc_state(struct i915_gpu_state *error)
{ {
struct drm_i915_private *i915 = error->i915; struct drm_i915_private *i915 = error->i915;
struct i915_error_uc *error_uc = &error->uc; struct i915_error_uc *error_uc = &error->uc;
...@@ -1567,9 +1465,11 @@ static void capture_uc_state(struct i915_gpu_state *error) ...@@ -1567,9 +1465,11 @@ static void capture_uc_state(struct i915_gpu_state *error)
* As modparams are generally accesible from the userspace make * As modparams are generally accesible from the userspace make
* explicit copies of the firmware paths. * explicit copies of the firmware paths.
*/ */
error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, GFP_ATOMIC); error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, GFP_ATOMIC); error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
error_uc->guc_log = i915_error_object_create(i915, uc->guc.log.vma); error_uc->guc_log = i915_error_object_create(i915,
uc->guc.log.vma,
compress);
} }
/* Capture all registers which don't fit into another category. */ /* Capture all registers which don't fit into another category. */
...@@ -1753,56 +1653,53 @@ static void capture_finish(struct i915_gpu_state *error) ...@@ -1753,56 +1653,53 @@ static void capture_finish(struct i915_gpu_state *error)
ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
} }
static int capture(void *data)
{
struct i915_gpu_state *error = data;
error->time = ktime_get_real();
error->boottime = ktime_get_boottime();
error->uptime = ktime_sub(ktime_get(),
error->i915->gt.last_init_time);
error->capture = jiffies;
capture_params(error);
capture_gen_state(error);
capture_uc_state(error);
capture_reg_state(error);
gem_record_fences(error);
gem_record_rings(error);
capture_active_buffers(error);
capture_pinned_buffers(error);
error->overlay = intel_overlay_capture_error_state(error->i915);
error->display = intel_display_capture_error_state(error->i915);
error->epoch = capture_find_epoch(error);
capture_finish(error);
return 0;
}
#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
struct i915_gpu_state * struct i915_gpu_state *
i915_capture_gpu_state(struct drm_i915_private *i915) i915_capture_gpu_state(struct drm_i915_private *i915)
{ {
struct i915_gpu_state *error; struct i915_gpu_state *error;
struct compress compress;
/* Check if GPU capture has been disabled */ /* Check if GPU capture has been disabled */
error = READ_ONCE(i915->gpu_error.first_error); error = READ_ONCE(i915->gpu_error.first_error);
if (IS_ERR(error)) if (IS_ERR(error))
return error; return error;
error = kzalloc(sizeof(*error), GFP_ATOMIC); error = kzalloc(sizeof(*error), ALLOW_FAIL);
if (!error) { if (!error) {
i915_disable_error_state(i915, -ENOMEM); i915_disable_error_state(i915, -ENOMEM);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
if (!compress_init(&compress)) {
kfree(error);
i915_disable_error_state(i915, -ENOMEM);
return ERR_PTR(-ENOMEM);
}
kref_init(&error->ref); kref_init(&error->ref);
error->i915 = i915; error->i915 = i915;
stop_machine(capture, error, NULL); error->time = ktime_get_real();
error->boottime = ktime_get_boottime();
error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
error->capture = jiffies;
capture_params(error);
capture_gen_state(error);
capture_uc_state(error, &compress);
capture_reg_state(error);
gem_record_fences(error);
gem_record_rings(error, &compress);
error->overlay = intel_overlay_capture_error_state(i915);
error->display = intel_display_capture_error_state(i915);
error->epoch = capture_find_epoch(error);
capture_finish(error);
compress_fini(&compress);
return error; return error;
} }
......
...@@ -85,7 +85,6 @@ struct i915_gpu_state { ...@@ -85,7 +85,6 @@ struct i915_gpu_state {
/* Software tracked state */ /* Software tracked state */
bool idle; bool idle;
unsigned long hangcheck_timestamp; unsigned long hangcheck_timestamp;
struct i915_address_space *vm;
int num_requests; int num_requests;
u32 reset_count; u32 reset_count;
...@@ -161,22 +160,6 @@ struct i915_gpu_state { ...@@ -161,22 +160,6 @@ struct i915_gpu_state {
} vm_info; } vm_info;
} engine[I915_NUM_ENGINES]; } engine[I915_NUM_ENGINES];
struct drm_i915_error_buffer {
u32 size;
u32 name;
u64 gtt_offset;
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
u32 tiling:2;
u32 dirty:1;
u32 purgeable:1;
u32 userptr:1;
u32 cache_level:3;
} *active_bo[I915_NUM_ENGINES], *pinned_bo;
u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
struct i915_address_space *active_vm[I915_NUM_ENGINES];
struct scatterlist *sgl, *fit; struct scatterlist *sgl, *fit;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment