Commit dd9efabd authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay

habanalabs: Increase queues depth

After recent concurrent cs amount increase, we must also
increase queues depth since much more concurrent work can be done.
All external queue depths were increased to 4096 as gaudi's
internal queue depths were also increased to 1024.
Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 917b79b0
...@@ -123,14 +123,14 @@ ...@@ -123,14 +123,14 @@
/* Internal QMANs PQ sizes */ /* Internal QMANs PQ sizes */
#define MME_QMAN_LENGTH 64 #define MME_QMAN_LENGTH 1024
#define MME_QMAN_SIZE_IN_BYTES (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) #define MME_QMAN_SIZE_IN_BYTES (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
#define HBM_DMA_QMAN_LENGTH 64 #define HBM_DMA_QMAN_LENGTH 1024
#define HBM_DMA_QMAN_SIZE_IN_BYTES \ #define HBM_DMA_QMAN_SIZE_IN_BYTES \
(HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) (HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
#define TPC_QMAN_LENGTH 64 #define TPC_QMAN_LENGTH 1024
#define TPC_QMAN_SIZE_IN_BYTES (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) #define TPC_QMAN_SIZE_IN_BYTES (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE)
#define SRAM_USER_BASE_OFFSET GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START #define SRAM_USER_BASE_OFFSET GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START
......
...@@ -378,38 +378,15 @@ struct hl_cb { ...@@ -378,38 +378,15 @@ struct hl_cb {
struct hl_cs_job; struct hl_cs_job;
/* /* Queue length of external and HW queues */
* Currently, there are two limitations on the maximum length of a queue: #define HL_QUEUE_LENGTH 4096
*
* 1. The memory footprint of the queue. The current allocated space for the
* queue is PAGE_SIZE. Because each entry in the queue is HL_BD_SIZE,
* the maximum length of the queue can be PAGE_SIZE / HL_BD_SIZE,
* which currently is 4096/16 = 256 entries.
*
* To increase that, we need either to decrease the size of the
* BD (difficult), or allocate more than a single page (easier).
*
* 2. Because the size of the JOB handle field in the BD CTL / completion queue
* is 10-bit, we can have up to 1024 open jobs per hardware queue.
* Therefore, each queue can hold up to 1024 entries.
*
* HL_QUEUE_LENGTH is in units of struct hl_bd.
* HL_QUEUE_LENGTH * sizeof(struct hl_bd) should be <= HL_PAGE_SIZE
*/
#define HL_PAGE_SIZE 4096 /* minimum page size */
/* Must be power of 2 (HL_PAGE_SIZE / HL_BD_SIZE) */
#define HL_QUEUE_LENGTH 256
#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE) #define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
/* /* HL_CQ_LENGTH is in units of struct hl_cq_entry */
* HL_CQ_LENGTH is in units of struct hl_cq_entry.
* HL_CQ_LENGTH should be <= HL_PAGE_SIZE
*/
#define HL_CQ_LENGTH HL_QUEUE_LENGTH #define HL_CQ_LENGTH HL_QUEUE_LENGTH
#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE) #define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
/* Must be power of 2 (HL_PAGE_SIZE / HL_EQ_ENTRY_SIZE) */ /* Must be power of 2 */
#define HL_EQ_LENGTH 64 #define HL_EQ_LENGTH 64
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE) #define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
......
...@@ -780,8 +780,6 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, ...@@ -780,8 +780,6 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
{ {
int rc; int rc;
BUILD_BUG_ON(HL_QUEUE_SIZE_IN_BYTES > HL_PAGE_SIZE);
q->hw_queue_id = hw_queue_id; q->hw_queue_id = hw_queue_id;
switch (q->queue_type) { switch (q->queue_type) {
......
...@@ -220,8 +220,6 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) ...@@ -220,8 +220,6 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
{ {
void *p; void *p;
BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES, p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
&q->bus_address, GFP_KERNEL | __GFP_ZERO); &q->bus_address, GFP_KERNEL | __GFP_ZERO);
if (!p) if (!p)
...@@ -282,8 +280,6 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) ...@@ -282,8 +280,6 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
{ {
void *p; void *p;
BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
HL_EQ_SIZE_IN_BYTES, HL_EQ_SIZE_IN_BYTES,
&q->bus_address); &q->bus_address);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment