Commit cffd7786 authored by Ashutosh Dixit's avatar Ashutosh Dixit

drm/xe/oa: Enable Xe2+ overrun mode

Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.

v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Acked-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: default avatarAshutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-18-ashutosh.dixit@intel.com
parent 3a1fc394
...@@ -114,7 +114,14 @@ static const struct xe_oa_format oa_formats[] = { ...@@ -114,7 +114,14 @@ static const struct xe_oa_format oa_formats[] = {
static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head)
{ {
return (tail - head) & (XE_OA_BUFFER_SIZE - 1); return tail >= head ? tail - head :
tail + stream->oa_buffer.circ_size - head;
}
static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n)
{
return ptr + n >= stream->oa_buffer.circ_size ?
ptr + n - stream->oa_buffer.circ_size : ptr + n;
} }
static void xe_oa_config_release(struct kref *ref) static void xe_oa_config_release(struct kref *ref)
...@@ -288,7 +295,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, ...@@ -288,7 +295,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf,
buf += *offset; buf += *offset;
oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
report_size_partial = oa_buf_end - report; report_size_partial = oa_buf_end - report;
if (report_size_partial < report_size) { if (report_size_partial < report_size) {
...@@ -314,7 +321,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, ...@@ -314,7 +321,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
int report_size = stream->oa_buffer.format->size; int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr; u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
u32 mask = (XE_OA_BUFFER_SIZE - 1);
size_t start_offset = *offset; size_t start_offset = *offset;
unsigned long flags; unsigned long flags;
u32 head, tail; u32 head, tail;
...@@ -325,21 +331,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, ...@@ -325,21 +331,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
tail = stream->oa_buffer.tail; tail = stream->oa_buffer.tail;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE); xe_assert(stream->oa->xe,
head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) { for (; xe_oa_circ_diff(stream, tail, head);
head = xe_oa_circ_incr(stream, head, report_size)) {
u8 *report = oa_buf_base + head; u8 *report = oa_buf_base + head;
ret = xe_oa_append_report(stream, buf, count, offset, report); ret = xe_oa_append_report(stream, buf, count, offset, report);
if (ret) if (ret)
break; break;
if (is_power_of_2(report_size)) { if (!(stream->oa_buffer.circ_size % report_size)) {
/* Clear out report id and timestamp to detect unlanded reports */ /* Clear out report id and timestamp to detect unlanded reports */
oa_report_id_clear(stream, (void *)report); oa_report_id_clear(stream, (void *)report);
oa_timestamp_clear(stream, (void *)report); oa_timestamp_clear(stream, (void *)report);
} else { } else {
u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
u32 part = oa_buf_end - report; u32 part = oa_buf_end - report;
/* Zero out the entire report */ /* Zero out the entire report */
...@@ -377,7 +385,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) ...@@ -377,7 +385,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK); gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0; stream->oa_buffer.head = 0;
/* /*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the * PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit". * OAHEADPTR register. This is to enable proper functionality of the overflow bit".
...@@ -1300,6 +1307,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, ...@@ -1300,6 +1307,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->periodic = param->period_exponent > 0; stream->periodic = param->period_exponent > 0;
stream->period_exponent = param->period_exponent; stream->period_exponent = param->period_exponent;
/*
* For Xe2+, when overrun mode is enabled, there are no partial reports at the end
* of buffer, making the OA buffer effectively a non-power-of-2 size circular
* buffer whose size, circ_size, is a multiple of the report size
*/
if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
stream->oa_buffer.circ_size =
XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
else
stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
/* If we don't find the context offset, just return error */ /* If we don't find the context offset, just return error */
ret = xe_oa_set_ctx_ctrl_offset(stream); ret = xe_oa_set_ctx_ctrl_offset(stream);
......
...@@ -170,6 +170,9 @@ struct xe_oa_buffer { ...@@ -170,6 +170,9 @@ struct xe_oa_buffer {
/** @tail: The last verified cached tail where HW has completed writing */ /** @tail: The last verified cached tail where HW has completed writing */
u32 tail; u32 tail;
/** @circ_size: The effective circular buffer size, for Xe2+ */
u32 circ_size;
}; };
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment