Commit edc3d884 authored by Mika Kuoppala's avatar Mika Kuoppala Committed by Daniel Vetter

drm/i915: avoid big kmallocs on reading error state

Sometimes when user is trying to get error state out from
debugfs after gpu hang, the memory is low and/or fragmented
enough that kmalloc in seq_file will fail.

Prevent big kmalloc by avoiding seq_file and instead convert
error state to string in smaller chunks.

v2: better alloc flags, better truncate, correct
locking, and error handling improvements (Chris Wilson)

v3: printf annotations (Daniel Vetter)
Signed-off-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent 03973536
......@@ -604,15 +604,80 @@ static const char *purgeable_flag(int purgeable)
return purgeable ? " purgeable" : "";
}
static void print_error_buffers(struct seq_file *m,
static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
const char *f, va_list args)
{
unsigned len;
if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) {
e->err = -ENOSPC;
return;
}
if (e->bytes == e->size - 1 || e->err)
return;
/* Seek the first printf which is hits start position */
if (e->pos < e->start) {
len = vsnprintf(NULL, 0, f, args);
if (e->pos + len <= e->start) {
e->pos += len;
return;
}
/* First vsnprintf needs to fit in full for memmove*/
if (len >= e->size) {
e->err = -EIO;
return;
}
}
len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args);
if (len >= e->size - e->bytes)
len = e->size - e->bytes - 1;
/* If this is first printf in this window, adjust it so that
* start position matches start of the buffer
*/
if (e->pos < e->start) {
const size_t off = e->start - e->pos;
/* Should not happen but be paranoid */
if (off > len || e->bytes) {
e->err = -EIO;
return;
}
memmove(e->buf, e->buf + off, len - off);
e->bytes = len - off;
e->pos = e->start;
return;
}
e->bytes += len;
e->pos += len;
}
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
{
va_list args;
va_start(args, f);
i915_error_vprintf(e, f, args);
va_end(args);
}
#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
static void print_error_buffers(struct drm_i915_error_state_buf *m,
const char *name,
struct drm_i915_error_buffer *err,
int count)
{
seq_printf(m, "%s [%d]:\n", name, count);
err_printf(m, "%s [%d]:\n", name, count);
while (count--) {
seq_printf(m, " %08x %8u %02x %02x %x %x%s%s%s%s%s%s%s",
err_printf(m, " %08x %8u %02x %02x %x %x%s%s%s%s%s%s%s",
err->gtt_offset,
err->size,
err->read_domains,
......@@ -627,50 +692,50 @@ static void print_error_buffers(struct seq_file *m,
cache_level_str(err->cache_level));
if (err->name)
seq_printf(m, " (name: %d)", err->name);
err_printf(m, " (name: %d)", err->name);
if (err->fence_reg != I915_FENCE_REG_NONE)
seq_printf(m, " (fence: %d)", err->fence_reg);
err_printf(m, " (fence: %d)", err->fence_reg);
seq_printf(m, "\n");
err_printf(m, "\n");
err++;
}
}
static void i915_ring_error_state(struct seq_file *m,
static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
struct drm_device *dev,
struct drm_i915_error_state *error,
unsigned ring)
{
BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */
seq_printf(m, "%s command stream:\n", ring_str(ring));
seq_printf(m, " HEAD: 0x%08x\n", error->head[ring]);
seq_printf(m, " TAIL: 0x%08x\n", error->tail[ring]);
seq_printf(m, " CTL: 0x%08x\n", error->ctl[ring]);
seq_printf(m, " ACTHD: 0x%08x\n", error->acthd[ring]);
seq_printf(m, " IPEIR: 0x%08x\n", error->ipeir[ring]);
seq_printf(m, " IPEHR: 0x%08x\n", error->ipehr[ring]);
seq_printf(m, " INSTDONE: 0x%08x\n", error->instdone[ring]);
err_printf(m, "%s command stream:\n", ring_str(ring));
err_printf(m, " HEAD: 0x%08x\n", error->head[ring]);
err_printf(m, " TAIL: 0x%08x\n", error->tail[ring]);
err_printf(m, " CTL: 0x%08x\n", error->ctl[ring]);
err_printf(m, " ACTHD: 0x%08x\n", error->acthd[ring]);
err_printf(m, " IPEIR: 0x%08x\n", error->ipeir[ring]);
err_printf(m, " IPEHR: 0x%08x\n", error->ipehr[ring]);
err_printf(m, " INSTDONE: 0x%08x\n", error->instdone[ring]);
if (ring == RCS && INTEL_INFO(dev)->gen >= 4)
seq_printf(m, " BBADDR: 0x%08llx\n", error->bbaddr);
err_printf(m, " BBADDR: 0x%08llx\n", error->bbaddr);
if (INTEL_INFO(dev)->gen >= 4)
seq_printf(m, " INSTPS: 0x%08x\n", error->instps[ring]);
seq_printf(m, " INSTPM: 0x%08x\n", error->instpm[ring]);
seq_printf(m, " FADDR: 0x%08x\n", error->faddr[ring]);
err_printf(m, " INSTPS: 0x%08x\n", error->instps[ring]);
err_printf(m, " INSTPM: 0x%08x\n", error->instpm[ring]);
err_printf(m, " FADDR: 0x%08x\n", error->faddr[ring]);
if (INTEL_INFO(dev)->gen >= 6) {
seq_printf(m, " RC PSMI: 0x%08x\n", error->rc_psmi[ring]);
seq_printf(m, " FAULT_REG: 0x%08x\n", error->fault_reg[ring]);
seq_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n",
err_printf(m, " RC PSMI: 0x%08x\n", error->rc_psmi[ring]);
err_printf(m, " FAULT_REG: 0x%08x\n", error->fault_reg[ring]);
err_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n",
error->semaphore_mboxes[ring][0],
error->semaphore_seqno[ring][0]);
seq_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n",
err_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n",
error->semaphore_mboxes[ring][1],
error->semaphore_seqno[ring][1]);
}
seq_printf(m, " seqno: 0x%08x\n", error->seqno[ring]);
seq_printf(m, " waiting: %s\n", yesno(error->waiting[ring]));
seq_printf(m, " ring->head: 0x%08x\n", error->cpu_ring_head[ring]);
seq_printf(m, " ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]);
err_printf(m, " seqno: 0x%08x\n", error->seqno[ring]);
err_printf(m, " waiting: %s\n", yesno(error->waiting[ring]));
err_printf(m, " ring->head: 0x%08x\n", error->cpu_ring_head[ring]);
err_printf(m, " ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]);
}
struct i915_error_state_file_priv {
......@@ -678,9 +743,11 @@ struct i915_error_state_file_priv {
struct drm_i915_error_state *error;
};
static int i915_error_state(struct seq_file *m, void *unused)
static int i915_error_state(struct i915_error_state_file_priv *error_priv,
struct drm_i915_error_state_buf *m)
{
struct i915_error_state_file_priv *error_priv = m->private;
struct drm_device *dev = error_priv->dev;
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_error_state *error = error_priv->error;
......@@ -688,34 +755,35 @@ static int i915_error_state(struct seq_file *m, void *unused)
int i, j, page, offset, elt;
if (!error) {
seq_printf(m, "no error state collected\n");
err_printf(m, "no error state collected\n");
return 0;
}
seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
error->time.tv_usec);
seq_printf(m, "Kernel: " UTS_RELEASE "\n");
seq_printf(m, "PCI ID: 0x%04x\n", dev->pci_device);
seq_printf(m, "EIR: 0x%08x\n", error->eir);
seq_printf(m, "IER: 0x%08x\n", error->ier);
seq_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
seq_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
seq_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
seq_printf(m, "CCID: 0x%08x\n", error->ccid);
err_printf(m, "Kernel: " UTS_RELEASE "\n");
err_printf(m, "PCI ID: 0x%04x\n", dev->pci_device);
err_printf(m, "EIR: 0x%08x\n", error->eir);
err_printf(m, "IER: 0x%08x\n", error->ier);
err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
err_printf(m, "CCID: 0x%08x\n", error->ccid);
for (i = 0; i < dev_priv->num_fence_regs; i++)
seq_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]);
err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]);
for (i = 0; i < ARRAY_SIZE(error->extra_instdone); i++)
seq_printf(m, " INSTDONE_%d: 0x%08x\n", i, error->extra_instdone[i]);
err_printf(m, " INSTDONE_%d: 0x%08x\n", i,
error->extra_instdone[i]);
if (INTEL_INFO(dev)->gen >= 6) {
seq_printf(m, "ERROR: 0x%08x\n", error->error);
seq_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
err_printf(m, "ERROR: 0x%08x\n", error->error);
err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
}
if (INTEL_INFO(dev)->gen == 7)
seq_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
for_each_ring(ring, dev_priv, i)
i915_ring_error_state(m, dev, error, i);
......@@ -734,24 +802,25 @@ static int i915_error_state(struct seq_file *m, void *unused)
struct drm_i915_error_object *obj;
if ((obj = error->ring[i].batchbuffer)) {
seq_printf(m, "%s --- gtt_offset = 0x%08x\n",
err_printf(m, "%s --- gtt_offset = 0x%08x\n",
dev_priv->ring[i].name,
obj->gtt_offset);
offset = 0;
for (page = 0; page < obj->page_count; page++) {
for (elt = 0; elt < PAGE_SIZE/4; elt++) {
seq_printf(m, "%08x : %08x\n", offset, obj->pages[page][elt]);
err_printf(m, "%08x : %08x\n", offset,
obj->pages[page][elt]);
offset += 4;
}
}
}
if (error->ring[i].num_requests) {
seq_printf(m, "%s --- %d requests\n",
err_printf(m, "%s --- %d requests\n",
dev_priv->ring[i].name,
error->ring[i].num_requests);
for (j = 0; j < error->ring[i].num_requests; j++) {
seq_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n",
err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n",
error->ring[i].requests[j].seqno,
error->ring[i].requests[j].jiffies,
error->ring[i].requests[j].tail);
......@@ -759,13 +828,13 @@ static int i915_error_state(struct seq_file *m, void *unused)
}
if ((obj = error->ring[i].ringbuffer)) {
seq_printf(m, "%s --- ringbuffer = 0x%08x\n",
err_printf(m, "%s --- ringbuffer = 0x%08x\n",
dev_priv->ring[i].name,
obj->gtt_offset);
offset = 0;
for (page = 0; page < obj->page_count; page++) {
for (elt = 0; elt < PAGE_SIZE/4; elt++) {
seq_printf(m, "%08x : %08x\n",
err_printf(m, "%08x : %08x\n",
offset,
obj->pages[page][elt]);
offset += 4;
......@@ -775,12 +844,12 @@ static int i915_error_state(struct seq_file *m, void *unused)
obj = error->ring[i].ctx;
if (obj) {
seq_printf(m, "%s --- HW Context = 0x%08x\n",
err_printf(m, "%s --- HW Context = 0x%08x\n",
dev_priv->ring[i].name,
obj->gtt_offset);
offset = 0;
for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
seq_printf(m, "[%04x] %08x %08x %08x %08x\n",
err_printf(m, "[%04x] %08x %08x %08x %08x\n",
offset,
obj->pages[0][elt],
obj->pages[0][elt+1],
......@@ -806,8 +875,7 @@ i915_error_state_write(struct file *filp,
size_t cnt,
loff_t *ppos)
{
struct seq_file *m = filp->private_data;
struct i915_error_state_file_priv *error_priv = m->private;
struct i915_error_state_file_priv *error_priv = filp->private_data;
struct drm_device *dev = error_priv->dev;
int ret;
......@@ -842,25 +910,81 @@ static int i915_error_state_open(struct inode *inode, struct file *file)
kref_get(&error_priv->error->ref);
spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
return single_open(file, i915_error_state, error_priv);
file->private_data = error_priv;
return 0;
}
static int i915_error_state_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
struct i915_error_state_file_priv *error_priv = m->private;
struct i915_error_state_file_priv *error_priv = file->private_data;
if (error_priv->error)
kref_put(&error_priv->error->ref, i915_error_state_free);
kfree(error_priv);
return single_release(inode, file);
return 0;
}
static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
size_t count, loff_t *pos)
{
struct i915_error_state_file_priv *error_priv = file->private_data;
struct drm_i915_error_state_buf error_str;
loff_t tmp_pos = 0;
ssize_t ret_count = 0;
int ret = 0;
memset(&error_str, 0, sizeof(error_str));
/* We need to have enough room to store any i915_error_state printf
* so that we can move it to start position.
*/
error_str.size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE;
error_str.buf = kmalloc(error_str.size,
GFP_TEMPORARY | __GFP_NORETRY | __GFP_NOWARN);
if (error_str.buf == NULL) {
error_str.size = PAGE_SIZE;
error_str.buf = kmalloc(error_str.size, GFP_TEMPORARY);
}
if (error_str.buf == NULL) {
error_str.size = 128;
error_str.buf = kmalloc(error_str.size, GFP_TEMPORARY);
}
if (error_str.buf == NULL)
return -ENOMEM;
error_str.start = *pos;
ret = i915_error_state(error_priv, &error_str);
if (ret)
goto out;
if (error_str.bytes == 0 && error_str.err) {
ret = error_str.err;
goto out;
}
ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos,
error_str.buf,
error_str.bytes);
if (ret_count < 0)
ret = ret_count;
else
*pos = error_str.start + ret_count;
out:
kfree(error_str.buf);
return ret ?: ret_count;
}
static const struct file_operations i915_error_state_fops = {
.owner = THIS_MODULE,
.open = i915_error_state_open,
.read = seq_read,
.read = i915_error_state_read,
.write = i915_error_state_write,
.llseek = default_llseek,
.release = i915_error_state_release,
......
......@@ -826,6 +826,15 @@ struct i915_gem_mm {
u32 object_count;
};
struct drm_i915_error_state_buf {
unsigned bytes;
unsigned size;
int err;
u8 *buf;
loff_t start;
loff_t pos;
};
struct i915_gpu_error {
/* For hangcheck timer */
#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
......@@ -1818,6 +1827,8 @@ void i915_gem_dump_object(struct drm_i915_gem_object *obj, int len,
/* i915_debugfs.c */
int i915_debugfs_init(struct drm_minor *minor);
void i915_debugfs_cleanup(struct drm_minor *minor);
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
/* i915_suspend.c */
extern int i915_save_state(struct drm_device *dev);
......@@ -1899,10 +1910,11 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
/* overlay */
#ifdef CONFIG_DEBUG_FS
extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
extern void intel_overlay_print_error_state(struct seq_file *m, struct intel_overlay_error_state *error);
extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
struct intel_overlay_error_state *error);
extern struct intel_display_error_state *intel_display_capture_error_state(struct drm_device *dev);
extern void intel_display_print_error_state(struct seq_file *m,
extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
struct drm_device *dev,
struct intel_display_error_state *error);
#endif
......
......@@ -9858,48 +9858,50 @@ intel_display_capture_error_state(struct drm_device *dev)
return error;
}
#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
void
intel_display_print_error_state(struct seq_file *m,
intel_display_print_error_state(struct drm_i915_error_state_buf *m,
struct drm_device *dev,
struct intel_display_error_state *error)
{
int i;
seq_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev)->num_pipes);
err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev)->num_pipes);
if (HAS_POWER_WELL(dev))
seq_printf(m, "PWR_WELL_CTL2: %08x\n",
err_printf(m, "PWR_WELL_CTL2: %08x\n",
error->power_well_driver);
for_each_pipe(i) {
seq_printf(m, "Pipe [%d]:\n", i);
seq_printf(m, " CPU transcoder: %c\n",
err_printf(m, "Pipe [%d]:\n", i);
err_printf(m, " CPU transcoder: %c\n",
transcoder_name(error->pipe[i].cpu_transcoder));
seq_printf(m, " CONF: %08x\n", error->pipe[i].conf);
seq_printf(m, " SRC: %08x\n", error->pipe[i].source);
seq_printf(m, " HTOTAL: %08x\n", error->pipe[i].htotal);
seq_printf(m, " HBLANK: %08x\n", error->pipe[i].hblank);
seq_printf(m, " HSYNC: %08x\n", error->pipe[i].hsync);
seq_printf(m, " VTOTAL: %08x\n", error->pipe[i].vtotal);
seq_printf(m, " VBLANK: %08x\n", error->pipe[i].vblank);
seq_printf(m, " VSYNC: %08x\n", error->pipe[i].vsync);
seq_printf(m, "Plane [%d]:\n", i);
seq_printf(m, " CNTR: %08x\n", error->plane[i].control);
seq_printf(m, " STRIDE: %08x\n", error->plane[i].stride);
err_printf(m, " CONF: %08x\n", error->pipe[i].conf);
err_printf(m, " SRC: %08x\n", error->pipe[i].source);
err_printf(m, " HTOTAL: %08x\n", error->pipe[i].htotal);
err_printf(m, " HBLANK: %08x\n", error->pipe[i].hblank);
err_printf(m, " HSYNC: %08x\n", error->pipe[i].hsync);
err_printf(m, " VTOTAL: %08x\n", error->pipe[i].vtotal);
err_printf(m, " VBLANK: %08x\n", error->pipe[i].vblank);
err_printf(m, " VSYNC: %08x\n", error->pipe[i].vsync);
err_printf(m, "Plane [%d]:\n", i);
err_printf(m, " CNTR: %08x\n", error->plane[i].control);
err_printf(m, " STRIDE: %08x\n", error->plane[i].stride);
if (INTEL_INFO(dev)->gen <= 3) {
seq_printf(m, " SIZE: %08x\n", error->plane[i].size);
seq_printf(m, " POS: %08x\n", error->plane[i].pos);
err_printf(m, " SIZE: %08x\n", error->plane[i].size);
err_printf(m, " POS: %08x\n", error->plane[i].pos);
}
if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
seq_printf(m, " ADDR: %08x\n", error->plane[i].addr);
err_printf(m, " ADDR: %08x\n", error->plane[i].addr);
if (INTEL_INFO(dev)->gen >= 4) {
seq_printf(m, " SURF: %08x\n", error->plane[i].surface);
seq_printf(m, " TILEOFF: %08x\n", error->plane[i].tile_offset);
err_printf(m, " SURF: %08x\n", error->plane[i].surface);
err_printf(m, " TILEOFF: %08x\n", error->plane[i].tile_offset);
}
seq_printf(m, "Cursor [%d]:\n", i);
seq_printf(m, " CNTR: %08x\n", error->cursor[i].control);
seq_printf(m, " POS: %08x\n", error->cursor[i].position);
seq_printf(m, " BASE: %08x\n", error->cursor[i].base);
err_printf(m, "Cursor [%d]:\n", i);
err_printf(m, " CNTR: %08x\n", error->cursor[i].control);
err_printf(m, " POS: %08x\n", error->cursor[i].position);
err_printf(m, " BASE: %08x\n", error->cursor[i].base);
}
}
#endif
......@@ -1485,14 +1485,15 @@ intel_overlay_capture_error_state(struct drm_device *dev)
}
void
intel_overlay_print_error_state(struct seq_file *m, struct intel_overlay_error_state *error)
intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
struct intel_overlay_error_state *error)
{
seq_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
error->dovsta, error->isr);
seq_printf(m, " Register file at 0x%08lx:\n",
error->base);
i915_error_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
error->dovsta, error->isr);
i915_error_printf(m, " Register file at 0x%08lx:\n",
error->base);
#define P(x) seq_printf(m, " " #x ": 0x%08x\n", error->regs.x)
#define P(x) i915_error_printf(m, " " #x ": 0x%08x\n", error->regs.x)
P(OBUF_0Y);
P(OBUF_1Y);
P(OBUF_0U);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment