Commit 21e7a346 authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay

habanalabs: sync stream generic functionality

Currently sync stream is limited only for external queues. We want to
remove this constraint by adding a new queue property dedicated for sync
stream. In addition we move the initialization and reset methods to the
common code since we can re-use them with slight changes.
Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent c16d45f4
...@@ -740,6 +740,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, ...@@ -740,6 +740,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
struct hl_cs_job *job; struct hl_cs_job *job;
struct hl_cs *cs; struct hl_cs *cs;
struct hl_cb *cb; struct hl_cb *cb;
enum hl_queue_type q_type;
u64 *signal_seq_arr = NULL, signal_seq; u64 *signal_seq_arr = NULL, signal_seq;
u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size; u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
int rc; int rc;
...@@ -772,9 +773,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, ...@@ -772,9 +773,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
chunk = &cs_chunk_array[0]; chunk = &cs_chunk_array[0];
q_idx = chunk->queue_index; q_idx = chunk->queue_index;
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
q_type = hw_queue_prop->type;
if ((q_idx >= HL_MAX_QUEUES) || if ((q_idx >= HL_MAX_QUEUES) ||
(hw_queue_prop->type != QUEUE_TYPE_EXT)) { (!hw_queue_prop->supports_sync_stream)) {
dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
rc = -EINVAL; rc = -EINVAL;
goto free_cs_chunk_array; goto free_cs_chunk_array;
...@@ -871,7 +873,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, ...@@ -871,7 +873,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
*cs_seq = cs->sequence; *cs_seq = cs->sequence;
job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); job = hl_cs_allocate_job(hdev, q_type, true);
if (!job) { if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n"); dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM; rc = -ENOMEM;
......
...@@ -345,10 +345,12 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) ...@@ -345,10 +345,12 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
prop->hw_queues_props[i].driver_only = 0; prop->hw_queues_props[i].driver_only = 0;
prop->hw_queues_props[i].requires_kernel_cb = 1; prop->hw_queues_props[i].requires_kernel_cb = 1;
prop->hw_queues_props[i].supports_sync_stream = 1;
} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
prop->hw_queues_props[i].driver_only = 1; prop->hw_queues_props[i].driver_only = 1;
prop->hw_queues_props[i].requires_kernel_cb = 0; prop->hw_queues_props[i].requires_kernel_cb = 0;
prop->hw_queues_props[i].supports_sync_stream = 0;
} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
prop->hw_queues_props[i].type = QUEUE_TYPE_INT; prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
prop->hw_queues_props[i].driver_only = 0; prop->hw_queues_props[i].driver_only = 0;
...@@ -357,6 +359,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) ...@@ -357,6 +359,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->hw_queues_props[i].type = QUEUE_TYPE_NA; prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
prop->hw_queues_props[i].driver_only = 0; prop->hw_queues_props[i].driver_only = 0;
prop->hw_queues_props[i].requires_kernel_cb = 0; prop->hw_queues_props[i].requires_kernel_cb = 0;
prop->hw_queues_props[i].supports_sync_stream = 0;
} }
} }
...@@ -364,7 +367,8 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) ...@@ -364,7 +367,8 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
prop->hw_queues_props[i].type = QUEUE_TYPE_NA; prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->sync_stream_first_sob = 0;
prop->sync_stream_first_mon = 0;
prop->dram_base_address = DRAM_PHYS_BASE; prop->dram_base_address = DRAM_PHYS_BASE;
prop->dram_size = GAUDI_HBM_SIZE_32GB; prop->dram_size = GAUDI_HBM_SIZE_32GB;
prop->dram_end_address = prop->dram_base_address + prop->dram_end_address = prop->dram_base_address +
...@@ -6296,44 +6300,6 @@ static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) ...@@ -6296,44 +6300,6 @@ static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
return gaudi_cq_assignment[cq_idx]; return gaudi_cq_assignment[cq_idx];
} }
static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
struct hl_hw_sob *hw_sob;
int sob, ext_idx = gaudi->ext_queue_idx++;
/*
* The external queues might not sit sequentially, hence use the
* real external queue index for the SOB/MON base id.
*/
hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
hw_queue->next_sob_val = 1;
hw_queue->curr_sob_offset = 0;
for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
hw_sob = &hw_queue->hw_sob[sob];
hw_sob->hdev = hdev;
hw_sob->sob_id = hw_queue->base_sob_id + sob;
hw_sob->q_idx = q_idx;
kref_init(&hw_sob->kref);
}
}
static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
{
struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
/*
* In case we got here due to a stuck CS, the refcnt might be bigger
* than 1 and therefore we reset it.
*/
kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
hw_queue->curr_sob_offset = 0;
hw_queue->next_sob_val = 1;
}
static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
{ {
return sizeof(struct packet_msg_short) + return sizeof(struct packet_msg_short) +
...@@ -6636,8 +6602,6 @@ static const struct hl_asic_funcs gaudi_funcs = { ...@@ -6636,8 +6602,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
.read_device_fw_version = gaudi_read_device_fw_version, .read_device_fw_version = gaudi_read_device_fw_version,
.load_firmware_to_device = gaudi_load_firmware_to_device, .load_firmware_to_device = gaudi_load_firmware_to_device,
.load_boot_fit_to_device = gaudi_load_boot_fit_to_device, .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
.ext_queue_init = gaudi_ext_queue_init,
.ext_queue_reset = gaudi_ext_queue_reset,
.get_signal_cb_size = gaudi_get_signal_cb_size, .get_signal_cb_size = gaudi_get_signal_cb_size,
.get_wait_cb_size = gaudi_get_wait_cb_size, .get_wait_cb_size = gaudi_get_wait_cb_size,
.gen_signal_cb = gaudi_gen_signal_cb, .gen_signal_cb = gaudi_gen_signal_cb,
......
...@@ -234,7 +234,6 @@ struct gaudi_internal_qman_info { ...@@ -234,7 +234,6 @@ struct gaudi_internal_qman_info {
* engine. * engine.
* @multi_msi_mode: whether we are working in multi MSI single MSI mode. * @multi_msi_mode: whether we are working in multi MSI single MSI mode.
* Multi MSI is possible only with IOMMU enabled. * Multi MSI is possible only with IOMMU enabled.
* @ext_queue_idx: helper index for external queues initialization.
* @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an * @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
* 8-bit value so use u8. * 8-bit value so use u8.
*/ */
...@@ -255,7 +254,6 @@ struct gaudi_device { ...@@ -255,7 +254,6 @@ struct gaudi_device {
u32 events_stat_aggregate[GAUDI_EVENT_SIZE]; u32 events_stat_aggregate[GAUDI_EVENT_SIZE];
u32 hw_cap_initialized; u32 hw_cap_initialized;
u8 multi_msi_mode; u8 multi_msi_mode;
u8 ext_queue_idx;
u8 mmu_cache_inv_pi; u8 mmu_cache_inv_pi;
}; };
......
...@@ -5156,16 +5156,6 @@ u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) ...@@ -5156,16 +5156,6 @@ u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
return cq_idx; return cq_idx;
} }
static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx)
{
}
static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
{
}
static u32 goya_get_signal_cb_size(struct hl_device *hdev) static u32 goya_get_signal_cb_size(struct hl_device *hdev)
{ {
return 0; return 0;
...@@ -5279,8 +5269,6 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5279,8 +5269,6 @@ static const struct hl_asic_funcs goya_funcs = {
.read_device_fw_version = goya_read_device_fw_version, .read_device_fw_version = goya_read_device_fw_version,
.load_firmware_to_device = goya_load_firmware_to_device, .load_firmware_to_device = goya_load_firmware_to_device,
.load_boot_fit_to_device = goya_load_boot_fit_to_device, .load_boot_fit_to_device = goya_load_boot_fit_to_device,
.ext_queue_init = goya_ext_queue_init,
.ext_queue_reset = goya_ext_queue_reset,
.get_signal_cb_size = goya_get_signal_cb_size, .get_signal_cb_size = goya_get_signal_cb_size,
.get_wait_cb_size = goya_get_wait_cb_size, .get_wait_cb_size = goya_get_wait_cb_size,
.gen_signal_cb = goya_gen_signal_cb, .gen_signal_cb = goya_gen_signal_cb,
......
...@@ -50,6 +50,10 @@ ...@@ -50,6 +50,10 @@
/* MMU */ /* MMU */
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ #define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
/*
* HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
* HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
*/
#define HL_RSVD_SOBS 4 #define HL_RSVD_SOBS 4
#define HL_RSVD_MONS 2 #define HL_RSVD_MONS 2
...@@ -141,11 +145,13 @@ struct hl_hw_sob { ...@@ -141,11 +145,13 @@ struct hl_hw_sob {
* false otherwise. * false otherwise.
* @requires_kernel_cb: true if a CB handle must be provided for jobs on this * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
* queue, false otherwise (a CB address must be provided). * queue, false otherwise (a CB address must be provided).
* @supports_sync_stream: True if queue supports sync stream
*/ */
struct hw_queue_properties { struct hw_queue_properties {
enum hl_queue_type type; enum hl_queue_type type;
u8 driver_only; u8 driver_only;
u8 requires_kernel_cb; u8 requires_kernel_cb;
u8 supports_sync_stream;
}; };
/** /**
...@@ -245,6 +251,9 @@ struct hl_mmu_properties { ...@@ -245,6 +251,9 @@ struct hl_mmu_properties {
* @cb_pool_cb_cnt: number of CBs in the CB pool. * @cb_pool_cb_cnt: number of CBs in the CB pool.
* @cb_pool_cb_size: size of each CB in the CB pool. * @cb_pool_cb_size: size of each CB in the CB pool.
* @tpc_enabled_mask: which TPCs are enabled. * @tpc_enabled_mask: which TPCs are enabled.
* @sync_stream_first_sob: first sync object available for sync stream use
* @sync_stream_first_mon: first monitor available for sync stream use
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues. * @completion_queues_count: number of completion queues.
*/ */
struct asic_fixed_properties { struct asic_fixed_properties {
...@@ -286,6 +295,8 @@ struct asic_fixed_properties { ...@@ -286,6 +295,8 @@ struct asic_fixed_properties {
u32 cb_pool_cb_cnt; u32 cb_pool_cb_cnt;
u32 cb_pool_cb_size; u32 cb_pool_cb_size;
u32 max_pending_cs; u32 max_pending_cs;
u16 sync_stream_first_sob;
u16 sync_stream_first_mon;
u8 tpc_enabled_mask; u8 tpc_enabled_mask;
u8 completion_queues_count; u8 completion_queues_count;
}; };
...@@ -423,6 +434,7 @@ struct hl_cs_job; ...@@ -423,6 +434,7 @@ struct hl_cs_job;
* exist). * exist).
* @curr_sob_offset: the id offset to the currently used SOB from the * @curr_sob_offset: the id offset to the currently used SOB from the
* HL_RSVD_SOBS that are being used by this queue. * HL_RSVD_SOBS that are being used by this queue.
* @supports_sync_stream: True if queue supports sync stream
*/ */
struct hl_hw_queue { struct hl_hw_queue {
struct hl_hw_sob hw_sob[HL_RSVD_SOBS]; struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
...@@ -441,6 +453,7 @@ struct hl_hw_queue { ...@@ -441,6 +453,7 @@ struct hl_hw_queue {
u16 base_mon_id; u16 base_mon_id;
u8 valid; u8 valid;
u8 curr_sob_offset; u8 curr_sob_offset;
u8 supports_sync_stream;
}; };
/** /**
...@@ -603,8 +616,6 @@ enum hl_pll_frequency { ...@@ -603,8 +616,6 @@ enum hl_pll_frequency {
* contained in registers * contained in registers
* @load_firmware_to_device: load the firmware to the device's memory * @load_firmware_to_device: load the firmware to the device's memory
* @load_boot_fit_to_device: load boot fit to device's memory * @load_boot_fit_to_device: load boot fit to device's memory
* @ext_queue_init: Initialize the given external queue.
* @ext_queue_reset: Reset the given external queue.
* @get_signal_cb_size: Get signal CB size. * @get_signal_cb_size: Get signal CB size.
* @get_wait_cb_size: Get wait CB size. * @get_wait_cb_size: Get wait CB size.
* @gen_signal_cb: Generate a signal CB. * @gen_signal_cb: Generate a signal CB.
...@@ -707,8 +718,6 @@ struct hl_asic_funcs { ...@@ -707,8 +718,6 @@ struct hl_asic_funcs {
enum hl_fw_component fwc); enum hl_fw_component fwc);
int (*load_firmware_to_device)(struct hl_device *hdev); int (*load_firmware_to_device)(struct hl_device *hdev);
int (*load_boot_fit_to_device)(struct hl_device *hdev); int (*load_boot_fit_to_device)(struct hl_device *hdev);
void (*ext_queue_init)(struct hl_device *hdev, u32 hw_queue_id);
void (*ext_queue_reset)(struct hl_device *hdev, u32 hw_queue_id);
u32 (*get_signal_cb_size)(struct hl_device *hdev); u32 (*get_signal_cb_size)(struct hl_device *hdev);
u32 (*get_wait_cb_size)(struct hl_device *hdev); u32 (*get_wait_cb_size)(struct hl_device *hdev);
void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id); void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
...@@ -1436,6 +1445,7 @@ struct hl_device_idle_busy_ts { ...@@ -1436,6 +1445,7 @@ struct hl_device_idle_busy_ts {
* @cdev_sysfs_created: were char devices and sysfs nodes created. * @cdev_sysfs_created: were char devices and sysfs nodes created.
* @stop_on_err: true if engines should stop on error. * @stop_on_err: true if engines should stop on error.
* @supports_sync_stream: is sync stream supported. * @supports_sync_stream: is sync stream supported.
* @sync_stream_queue_idx: helper index for sync stream queues initialization.
* @supports_coresight: is CoreSight supported. * @supports_coresight: is CoreSight supported.
* @supports_soft_reset: is soft reset supported. * @supports_soft_reset: is soft reset supported.
*/ */
...@@ -1523,6 +1533,7 @@ struct hl_device { ...@@ -1523,6 +1533,7 @@ struct hl_device {
u8 cdev_sysfs_created; u8 cdev_sysfs_created;
u8 stop_on_err; u8 stop_on_err;
u8 supports_sync_stream; u8 supports_sync_stream;
u8 sync_stream_queue_idx;
u8 supports_coresight; u8 supports_coresight;
u8 supports_soft_reset; u8 supports_soft_reset;
......
...@@ -663,9 +663,6 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, ...@@ -663,9 +663,6 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
q->ci = 0; q->ci = 0;
q->pi = 0; q->pi = 0;
if (!is_cpu_queue)
hdev->asic_funcs->ext_queue_init(hdev, q->hw_queue_id);
return 0; return 0;
free_queue: free_queue:
...@@ -732,6 +729,42 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) ...@@ -732,6 +729,42 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
return 0; return 0;
} }
static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
{
struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_hw_sob *hw_sob;
int sob, queue_idx = hdev->sync_stream_queue_idx++;
hw_queue->base_sob_id =
prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
hw_queue->base_mon_id =
prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
hw_queue->next_sob_val = 1;
hw_queue->curr_sob_offset = 0;
for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
hw_sob = &hw_queue->hw_sob[sob];
hw_sob->hdev = hdev;
hw_sob->sob_id = hw_queue->base_sob_id + sob;
hw_sob->q_idx = q_idx;
kref_init(&hw_sob->kref);
}
}
static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
{
struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
/*
* In case we got here due to a stuck CS, the refcnt might be bigger
* than 1 and therefore we reset it.
*/
kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
hw_queue->curr_sob_offset = 0;
hw_queue->next_sob_val = 1;
}
/* /*
* queue_init - main initialization function for H/W queue object * queue_init - main initialization function for H/W queue object
* *
...@@ -774,6 +807,9 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, ...@@ -774,6 +807,9 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
break; break;
} }
if (q->supports_sync_stream)
sync_stream_queue_init(hdev, q->hw_queue_id);
if (rc) if (rc)
return rc; return rc;
...@@ -848,6 +884,8 @@ int hl_hw_queues_create(struct hl_device *hdev) ...@@ -848,6 +884,8 @@ int hl_hw_queues_create(struct hl_device *hdev)
i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) { i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {
q->queue_type = asic->hw_queues_props[i].type; q->queue_type = asic->hw_queues_props[i].type;
q->supports_sync_stream =
asic->hw_queues_props[i].supports_sync_stream;
rc = queue_init(hdev, q, i); rc = queue_init(hdev, q, i);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
...@@ -889,7 +927,7 @@ void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset) ...@@ -889,7 +927,7 @@ void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
continue; continue;
q->pi = q->ci = 0; q->pi = q->ci = 0;
if (q->queue_type == QUEUE_TYPE_EXT) if (q->supports_sync_stream)
hdev->asic_funcs->ext_queue_reset(hdev, q->hw_queue_id); sync_stream_queue_reset(hdev, q->hw_queue_id);
} }
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment