Commit 975ab7b3 authored by Oded Gabbay's avatar Oded Gabbay

habanalabs: count dropped CS because max CS in-flight

There is a case where the user reaches the maximum number of CS in-flight.
In that case, the driver rejects the new CS of the user with EAGAIN. Count
that event so the user can query the driver later to see if it happened.
Reviewed-by: default avatarTomer Tayar <ttayar@habana.ai>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 0db57535
...@@ -252,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) ...@@ -252,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
ctx->cs_counters.parsing_drop_cnt; ctx->cs_counters.parsing_drop_cnt;
hdev->aggregated_cs_counters.queue_full_drop_cnt += hdev->aggregated_cs_counters.queue_full_drop_cnt +=
ctx->cs_counters.queue_full_drop_cnt; ctx->cs_counters.queue_full_drop_cnt;
hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
ctx->cs_counters.max_cs_in_flight_drop_cnt;
} }
static void cs_do_release(struct kref *ref) static void cs_do_release(struct kref *ref)
...@@ -431,8 +433,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -431,8 +433,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
(hdev->asic_prop.max_pending_cs - 1)]; (hdev->asic_prop.max_pending_cs - 1)];
if (other && !completion_done(&other->completion)) { if (other && !completion_done(&other->completion)) {
dev_dbg(hdev->dev, dev_dbg_ratelimited(hdev->dev,
"Rejecting CS because of too many in-flights CS\n"); "Rejecting CS because of too many in-flights CS\n");
ctx->cs_counters.max_cs_in_flight_drop_cnt++;
rc = -EAGAIN; rc = -EAGAIN;
goto free_fence; goto free_fence;
} }
......
...@@ -401,12 +401,14 @@ struct hl_info_sync_manager { ...@@ -401,12 +401,14 @@ struct hl_info_sync_manager {
* @parsing_drop_cnt: dropped due to error in packet parsing * @parsing_drop_cnt: dropped due to error in packet parsing
* @queue_full_drop_cnt: dropped due to queue full * @queue_full_drop_cnt: dropped due to queue full
* @device_in_reset_drop_cnt: dropped due to device in reset * @device_in_reset_drop_cnt: dropped due to device in reset
* @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
*/ */
struct hl_cs_counters { struct hl_cs_counters {
__u64 out_of_mem_drop_cnt; __u64 out_of_mem_drop_cnt;
__u64 parsing_drop_cnt; __u64 parsing_drop_cnt;
__u64 queue_full_drop_cnt; __u64 queue_full_drop_cnt;
__u64 device_in_reset_drop_cnt; __u64 device_in_reset_drop_cnt;
__u64 max_cs_in_flight_drop_cnt;
}; };
struct hl_info_cs_counters { struct hl_info_cs_counters {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment