Commit 9494a8dd authored by Oded Gabbay's avatar Oded Gabbay Committed by Greg Kroah-Hartman

habanalabs: add h/w queues module

This patch adds the H/W queues module and the code to initialize Goya's
various compute and DMA engines and their queues.

Goya has 5 DMA channels, 8 TPC engines and a single MME engine. For each
channel/engine, there is a H/W queue logic which is used to pass commands
from the user to the H/W. That logic is called QMAN.

There are two types of QMANs: external and internal. The DMA QMANs are
considered external while the TPC and MME QMANs are considered internal.
For each external queue there is a completion queue, which is located on
the Host memory.

The differences between external and internal QMANs are:

1. The location of the queue's memory. External QMANs are located on the
   Host memory while internal QMANs are located on the on-chip memory.

2. The external QMAN write an entry to a completion queue and sends an
   MSI-X interrupt upon completion of a command buffer that was given to
   it. The internal QMAN doesn't do that.
Reviewed-by: default avatarMike Rapoport <rppt@linux.ibm.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 839c4803
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
obj-m := habanalabs.o obj-m := habanalabs.o
habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \ habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
command_buffer.o command_buffer.o hw_queue.o irq.o
include $(src)/goya/Makefile include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES) habanalabs-y += $(HL_GOYA_FILES)
...@@ -174,13 +174,23 @@ static int device_early_init(struct hl_device *hdev) ...@@ -174,13 +174,23 @@ static int device_early_init(struct hl_device *hdev)
if (rc) if (rc)
goto early_fini; goto early_fini;
hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
if (hdev->cq_wq == NULL) {
dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
rc = -ENOMEM;
goto asid_fini;
}
hl_cb_mgr_init(&hdev->kernel_cb_mgr); hl_cb_mgr_init(&hdev->kernel_cb_mgr);
mutex_init(&hdev->fd_open_cnt_lock); mutex_init(&hdev->fd_open_cnt_lock);
mutex_init(&hdev->send_cpu_message_lock);
atomic_set(&hdev->fd_open_cnt, 0); atomic_set(&hdev->fd_open_cnt, 0);
return 0; return 0;
asid_fini:
hl_asid_fini(hdev);
early_fini: early_fini:
if (hdev->asic_funcs->early_fini) if (hdev->asic_funcs->early_fini)
hdev->asic_funcs->early_fini(hdev); hdev->asic_funcs->early_fini(hdev);
...@@ -196,9 +206,12 @@ static int device_early_init(struct hl_device *hdev) ...@@ -196,9 +206,12 @@ static int device_early_init(struct hl_device *hdev)
*/ */
static void device_early_fini(struct hl_device *hdev) static void device_early_fini(struct hl_device *hdev)
{ {
mutex_destroy(&hdev->send_cpu_message_lock);
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
destroy_workqueue(hdev->cq_wq);
hl_asid_fini(hdev); hl_asid_fini(hdev);
if (hdev->asic_funcs->early_fini) if (hdev->asic_funcs->early_fini)
...@@ -277,7 +290,7 @@ int hl_device_resume(struct hl_device *hdev) ...@@ -277,7 +290,7 @@ int hl_device_resume(struct hl_device *hdev)
*/ */
int hl_device_init(struct hl_device *hdev, struct class *hclass) int hl_device_init(struct hl_device *hdev, struct class *hclass)
{ {
int rc; int i, rc, cq_ready_cnt;
/* Create device */ /* Create device */
rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops); rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops);
...@@ -298,11 +311,48 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -298,11 +311,48 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (rc) if (rc)
goto early_fini; goto early_fini;
/*
* Initialize the H/W queues. Must be done before hw_init, because
* there the addresses of the kernel queue are being written to the
* registers of the device
*/
rc = hl_hw_queues_create(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize kernel queues\n");
goto sw_fini;
}
/*
* Initialize the completion queues. Must be done before hw_init,
* because there the addresses of the completion queues are being
* passed as arguments to request_irq
*/
hdev->completion_queue =
kcalloc(hdev->asic_prop.completion_queues_count,
sizeof(*hdev->completion_queue), GFP_KERNEL);
if (!hdev->completion_queue) {
dev_err(hdev->dev, "failed to allocate completion queues\n");
rc = -ENOMEM;
goto hw_queues_destroy;
}
for (i = 0, cq_ready_cnt = 0;
i < hdev->asic_prop.completion_queues_count;
i++, cq_ready_cnt++) {
rc = hl_cq_init(hdev, &hdev->completion_queue[i], i);
if (rc) {
dev_err(hdev->dev,
"failed to initialize completion queue\n");
goto cq_fini;
}
}
/* Allocate the kernel context */ /* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
if (!hdev->kernel_ctx) { if (!hdev->kernel_ctx) {
rc = -ENOMEM; rc = -ENOMEM;
goto sw_fini; goto cq_fini;
} }
hdev->user_ctx = NULL; hdev->user_ctx = NULL;
...@@ -328,6 +378,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -328,6 +378,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->disabled = false; hdev->disabled = false;
/* Check that the communication with the device is working */
rc = hdev->asic_funcs->test_queues(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to detect if device is alive\n");
rc = 0;
goto out_disabled;
}
dev_notice(hdev->dev, dev_notice(hdev->dev,
"Successfully added device to habanalabs driver\n"); "Successfully added device to habanalabs driver\n");
...@@ -339,6 +397,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -339,6 +397,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
"kernel ctx is still alive on initialization failure\n"); "kernel ctx is still alive on initialization failure\n");
free_ctx: free_ctx:
kfree(hdev->kernel_ctx); kfree(hdev->kernel_ctx);
cq_fini:
for (i = 0 ; i < cq_ready_cnt ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue);
hw_queues_destroy:
hl_hw_queues_destroy(hdev);
sw_fini: sw_fini:
hdev->asic_funcs->sw_fini(hdev); hdev->asic_funcs->sw_fini(hdev);
early_fini: early_fini:
...@@ -368,6 +432,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -368,6 +432,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
*/ */
void hl_device_fini(struct hl_device *hdev) void hl_device_fini(struct hl_device *hdev)
{ {
int i;
dev_info(hdev->dev, "Removing device\n"); dev_info(hdev->dev, "Removing device\n");
/* Mark device as disabled */ /* Mark device as disabled */
...@@ -382,6 +447,12 @@ void hl_device_fini(struct hl_device *hdev) ...@@ -382,6 +447,12 @@ void hl_device_fini(struct hl_device *hdev)
/* Reset the H/W. It will be in idle state after this returns */ /* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, true); hdev->asic_funcs->hw_fini(hdev, true);
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue);
hl_hw_queues_destroy(hdev);
/* Call ASIC S/W finalize function */ /* Call ASIC S/W finalize function */
hdev->asic_funcs->sw_fini(hdev); hdev->asic_funcs->sw_fini(hdev);
......
This diff is collapsed.
...@@ -11,7 +11,9 @@ ...@@ -11,7 +11,9 @@
#include <uapi/misc/habanalabs.h> #include <uapi/misc/habanalabs.h>
#include "habanalabs.h" #include "habanalabs.h"
#include "include/hl_boot_if.h" #include "include/hl_boot_if.h"
#include "include/goya/goya_packets.h"
#include "include/goya/goya.h" #include "include/goya/goya.h"
#include "include/goya/goya_async_events.h"
#include "include/goya/goya_fw_if.h" #include "include/goya/goya_fw_if.h"
#define NUMBER_OF_CMPLT_QUEUES 5 #define NUMBER_OF_CMPLT_QUEUES 5
...@@ -145,12 +147,17 @@ enum goya_fw_component { ...@@ -145,12 +147,17 @@ enum goya_fw_component {
}; };
struct goya_device { struct goya_device {
int (*test_cpu_queue)(struct hl_device *hdev);
/* TODO: remove hw_queues_lock after moving to scheduler code */ /* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock; spinlock_t hw_queues_lock;
u64 ddr_bar_cur_addr; u64 ddr_bar_cur_addr;
u32 hw_cap_initialized; u32 hw_cap_initialized;
}; };
int goya_test_cpu_queue(struct hl_device *hdev);
int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
u32 timeout, long *result);
void goya_init_security(struct hl_device *hdev); void goya_init_security(struct hl_device *hdev);
#endif /* GOYAP_H_ */ #endif /* GOYAP_H_ */
This diff is collapsed.
...@@ -169,6 +169,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, ...@@ -169,6 +169,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
/* Parameters for bring-up - set them to defaults */ /* Parameters for bring-up - set them to defaults */
hdev->cpu_enable = 1; hdev->cpu_enable = 1;
hdev->reset_pcilink = 0; hdev->reset_pcilink = 0;
hdev->cpu_queues_enable = 1;
hdev->fw_loading = 1; hdev->fw_loading = 1;
hdev->pldm = 0; hdev->pldm = 0;
...@@ -176,6 +177,10 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, ...@@ -176,6 +177,10 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
if (!hdev->cpu_enable) if (!hdev->cpu_enable)
hdev->fw_loading = 0; hdev->fw_loading = 0;
/* If we don't load FW, no need to initialize CPU queues */
if (!hdev->fw_loading)
hdev->cpu_queues_enable = 0;
hdev->disabled = true; hdev->disabled = true;
hdev->pdev = pdev; /* can be NULL in case of simulator device */ hdev->pdev = pdev; /* can be NULL in case of simulator device */
......
This diff is collapsed.
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2018 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef __GOYA_ASYNC_EVENTS_H_
#define __GOYA_ASYNC_EVENTS_H_
enum goya_async_event_id {
GOYA_ASYNC_EVENT_ID_PCIE_IF = 33,
GOYA_ASYNC_EVENT_ID_TPC0_ECC = 36,
GOYA_ASYNC_EVENT_ID_TPC1_ECC = 39,
GOYA_ASYNC_EVENT_ID_TPC2_ECC = 42,
GOYA_ASYNC_EVENT_ID_TPC3_ECC = 45,
GOYA_ASYNC_EVENT_ID_TPC4_ECC = 48,
GOYA_ASYNC_EVENT_ID_TPC5_ECC = 51,
GOYA_ASYNC_EVENT_ID_TPC6_ECC = 54,
GOYA_ASYNC_EVENT_ID_TPC7_ECC = 57,
GOYA_ASYNC_EVENT_ID_MME_ECC = 60,
GOYA_ASYNC_EVENT_ID_MME_ECC_EXT = 61,
GOYA_ASYNC_EVENT_ID_MMU_ECC = 63,
GOYA_ASYNC_EVENT_ID_DMA_MACRO = 64,
GOYA_ASYNC_EVENT_ID_DMA_ECC = 66,
GOYA_ASYNC_EVENT_ID_CPU_IF_ECC = 75,
GOYA_ASYNC_EVENT_ID_PSOC_MEM = 78,
GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT = 79,
GOYA_ASYNC_EVENT_ID_SRAM0 = 81,
GOYA_ASYNC_EVENT_ID_SRAM1 = 82,
GOYA_ASYNC_EVENT_ID_SRAM2 = 83,
GOYA_ASYNC_EVENT_ID_SRAM3 = 84,
GOYA_ASYNC_EVENT_ID_SRAM4 = 85,
GOYA_ASYNC_EVENT_ID_SRAM5 = 86,
GOYA_ASYNC_EVENT_ID_SRAM6 = 87,
GOYA_ASYNC_EVENT_ID_SRAM7 = 88,
GOYA_ASYNC_EVENT_ID_SRAM8 = 89,
GOYA_ASYNC_EVENT_ID_SRAM9 = 90,
GOYA_ASYNC_EVENT_ID_SRAM10 = 91,
GOYA_ASYNC_EVENT_ID_SRAM11 = 92,
GOYA_ASYNC_EVENT_ID_SRAM12 = 93,
GOYA_ASYNC_EVENT_ID_SRAM13 = 94,
GOYA_ASYNC_EVENT_ID_SRAM14 = 95,
GOYA_ASYNC_EVENT_ID_SRAM15 = 96,
GOYA_ASYNC_EVENT_ID_SRAM16 = 97,
GOYA_ASYNC_EVENT_ID_SRAM17 = 98,
GOYA_ASYNC_EVENT_ID_SRAM18 = 99,
GOYA_ASYNC_EVENT_ID_SRAM19 = 100,
GOYA_ASYNC_EVENT_ID_SRAM20 = 101,
GOYA_ASYNC_EVENT_ID_SRAM21 = 102,
GOYA_ASYNC_EVENT_ID_SRAM22 = 103,
GOYA_ASYNC_EVENT_ID_SRAM23 = 104,
GOYA_ASYNC_EVENT_ID_SRAM24 = 105,
GOYA_ASYNC_EVENT_ID_SRAM25 = 106,
GOYA_ASYNC_EVENT_ID_SRAM26 = 107,
GOYA_ASYNC_EVENT_ID_SRAM27 = 108,
GOYA_ASYNC_EVENT_ID_SRAM28 = 109,
GOYA_ASYNC_EVENT_ID_SRAM29 = 110,
GOYA_ASYNC_EVENT_ID_GIC500 = 112,
GOYA_ASYNC_EVENT_ID_PCIE_DEC = 115,
GOYA_ASYNC_EVENT_ID_TPC0_DEC = 117,
GOYA_ASYNC_EVENT_ID_TPC1_DEC = 120,
GOYA_ASYNC_EVENT_ID_TPC2_DEC = 123,
GOYA_ASYNC_EVENT_ID_TPC3_DEC = 126,
GOYA_ASYNC_EVENT_ID_TPC4_DEC = 129,
GOYA_ASYNC_EVENT_ID_TPC5_DEC = 132,
GOYA_ASYNC_EVENT_ID_TPC6_DEC = 135,
GOYA_ASYNC_EVENT_ID_TPC7_DEC = 138,
GOYA_ASYNC_EVENT_ID_AXI_ECC = 139,
GOYA_ASYNC_EVENT_ID_L2_RAM_ECC = 140,
GOYA_ASYNC_EVENT_ID_MME_WACS = 141,
GOYA_ASYNC_EVENT_ID_MME_WACSD = 142,
GOYA_ASYNC_EVENT_ID_PLL0 = 143,
GOYA_ASYNC_EVENT_ID_PLL1 = 144,
GOYA_ASYNC_EVENT_ID_PLL3 = 146,
GOYA_ASYNC_EVENT_ID_PLL4 = 147,
GOYA_ASYNC_EVENT_ID_PLL5 = 148,
GOYA_ASYNC_EVENT_ID_PLL6 = 149,
GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER = 155,
GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC = 159,
GOYA_ASYNC_EVENT_ID_PSOC = 160,
GOYA_ASYNC_EVENT_ID_PCIE_FLR = 171,
GOYA_ASYNC_EVENT_ID_PCIE_HOT_RESET = 172,
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG0 = 174,
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG1 = 175,
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG2 = 176,
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG3 = 177,
GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG0 = 178,
GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG1 = 179,
GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG2 = 180,
GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG3 = 181,
GOYA_ASYNC_EVENT_ID_PCIE_APB = 182,
GOYA_ASYNC_EVENT_ID_PCIE_QDB = 183,
GOYA_ASYNC_EVENT_ID_PCIE_BM_D_P_WR = 184,
GOYA_ASYNC_EVENT_ID_PCIE_BM_D_RD = 185,
GOYA_ASYNC_EVENT_ID_PCIE_BM_U_P_WR = 186,
GOYA_ASYNC_EVENT_ID_PCIE_BM_U_RD = 187,
GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU = 190,
GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR = 191,
GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU = 200,
GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR = 201,
GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU = 210,
GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR = 211,
GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU = 220,
GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR = 221,
GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU = 230,
GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR = 231,
GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU = 240,
GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR = 241,
GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU = 250,
GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR = 251,
GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU = 260,
GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR = 261,
GOYA_ASYNC_EVENT_ID_MMU_SBA_SPMU0 = 270,
GOYA_ASYNC_EVENT_ID_MMU_SBA_SPMU1 = 271,
GOYA_ASYNC_EVENT_ID_MME_WACS_UP = 272,
GOYA_ASYNC_EVENT_ID_MME_WACS_DOWN = 273,
GOYA_ASYNC_EVENT_ID_MMU_PAGE_FAULT = 280,
GOYA_ASYNC_EVENT_ID_MMU_WR_PERM = 281,
GOYA_ASYNC_EVENT_ID_MMU_DBG_BM = 282,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 = 290,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH1 = 291,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH2 = 292,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH3 = 293,
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 = 294,
GOYA_ASYNC_EVENT_ID_DDR0_PHY_DFI = 300,
GOYA_ASYNC_EVENT_ID_DDR0_ECC_SCRUB = 301,
GOYA_ASYNC_EVENT_ID_DDR0_DB_ECC = 302,
GOYA_ASYNC_EVENT_ID_DDR0_SB_ECC = 303,
GOYA_ASYNC_EVENT_ID_DDR0_SB_ECC_MC = 304,
GOYA_ASYNC_EVENT_ID_DDR0_AXI_RD = 305,
GOYA_ASYNC_EVENT_ID_DDR0_AXI_WR = 306,
GOYA_ASYNC_EVENT_ID_DDR1_PHY_DFI = 310,
GOYA_ASYNC_EVENT_ID_DDR1_ECC_SCRUB = 311,
GOYA_ASYNC_EVENT_ID_DDR1_DB_ECC = 312,
GOYA_ASYNC_EVENT_ID_DDR1_SB_ECC = 313,
GOYA_ASYNC_EVENT_ID_DDR1_SB_ECC_MC = 314,
GOYA_ASYNC_EVENT_ID_DDR1_AXI_RD = 315,
GOYA_ASYNC_EVENT_ID_DDR1_AXI_WR = 316,
GOYA_ASYNC_EVENT_ID_CPU_BMON = 320,
GOYA_ASYNC_EVENT_ID_TS_EAST = 322,
GOYA_ASYNC_EVENT_ID_TS_WEST = 323,
GOYA_ASYNC_EVENT_ID_TS_NORTH = 324,
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_0 = 330,
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_1 = 331,
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_2 = 332,
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET = 356,
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT = 361,
GOYA_ASYNC_EVENT_ID_TPC0_CMDQ = 430,
GOYA_ASYNC_EVENT_ID_TPC1_CMDQ = 431,
GOYA_ASYNC_EVENT_ID_TPC2_CMDQ = 432,
GOYA_ASYNC_EVENT_ID_TPC3_CMDQ = 433,
GOYA_ASYNC_EVENT_ID_TPC4_CMDQ = 434,
GOYA_ASYNC_EVENT_ID_TPC5_CMDQ = 435,
GOYA_ASYNC_EVENT_ID_TPC6_CMDQ = 436,
GOYA_ASYNC_EVENT_ID_TPC7_CMDQ = 437,
GOYA_ASYNC_EVENT_ID_TPC0_QM = 438,
GOYA_ASYNC_EVENT_ID_TPC1_QM = 439,
GOYA_ASYNC_EVENT_ID_TPC2_QM = 440,
GOYA_ASYNC_EVENT_ID_TPC3_QM = 441,
GOYA_ASYNC_EVENT_ID_TPC4_QM = 442,
GOYA_ASYNC_EVENT_ID_TPC5_QM = 443,
GOYA_ASYNC_EVENT_ID_TPC6_QM = 444,
GOYA_ASYNC_EVENT_ID_TPC7_QM = 445,
GOYA_ASYNC_EVENT_ID_MME_QM = 447,
GOYA_ASYNC_EVENT_ID_MME_CMDQ = 448,
GOYA_ASYNC_EVENT_ID_DMA0_QM = 449,
GOYA_ASYNC_EVENT_ID_DMA1_QM = 450,
GOYA_ASYNC_EVENT_ID_DMA2_QM = 451,
GOYA_ASYNC_EVENT_ID_DMA3_QM = 452,
GOYA_ASYNC_EVENT_ID_DMA4_QM = 453,
GOYA_ASYNC_EVENT_ID_DMA_ON_HBW = 454,
GOYA_ASYNC_EVENT_ID_DMA0_CH = 455,
GOYA_ASYNC_EVENT_ID_DMA1_CH = 456,
GOYA_ASYNC_EVENT_ID_DMA2_CH = 457,
GOYA_ASYNC_EVENT_ID_DMA3_CH = 458,
GOYA_ASYNC_EVENT_ID_DMA4_CH = 459,
GOYA_ASYNC_EVENT_ID_PI_UPDATE = 484,
GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486,
GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487,
GOYA_ASYNC_EVENT_ID_LAST_VALID_ID = 1023,
GOYA_ASYNC_EVENT_ID_SIZE
};
#endif /* __GOYA_ASYNC_EVENTS_H_ */
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2017-2018 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef GOYA_PACKETS_H
#define GOYA_PACKETS_H
#include <linux/types.h>
#define PACKET_HEADER_PACKET_ID_SHIFT 56
#define PACKET_HEADER_PACKET_ID_MASK 0x1F00000000000000ull
enum packet_id {
PACKET_WREG_32 = 0x1,
PACKET_WREG_BULK = 0x2,
PACKET_MSG_LONG = 0x3,
PACKET_MSG_SHORT = 0x4,
PACKET_CP_DMA = 0x5,
PACKET_MSG_PROT = 0x7,
PACKET_FENCE = 0x8,
PACKET_LIN_DMA = 0x9,
PACKET_NOP = 0xA,
PACKET_STOP = 0xB,
MAX_PACKET_ID = (PACKET_HEADER_PACKET_ID_MASK >>
PACKET_HEADER_PACKET_ID_SHIFT) + 1
};
enum goya_dma_direction {
DMA_HOST_TO_DRAM,
DMA_HOST_TO_SRAM,
DMA_DRAM_TO_SRAM,
DMA_SRAM_TO_DRAM,
DMA_SRAM_TO_HOST,
DMA_DRAM_TO_HOST,
DMA_DRAM_TO_DRAM,
DMA_SRAM_TO_SRAM,
DMA_ENUM_MAX
};
#define GOYA_PKT_CTL_OPCODE_SHIFT 24
#define GOYA_PKT_CTL_OPCODE_MASK 0x1F000000
#define GOYA_PKT_CTL_EB_SHIFT 29
#define GOYA_PKT_CTL_EB_MASK 0x20000000
#define GOYA_PKT_CTL_RB_SHIFT 30
#define GOYA_PKT_CTL_RB_MASK 0x40000000
#define GOYA_PKT_CTL_MB_SHIFT 31
#define GOYA_PKT_CTL_MB_MASK 0x80000000
struct packet_nop {
__le32 reserved;
__le32 ctl;
};
struct packet_stop {
__le32 reserved;
__le32 ctl;
};
#define GOYA_PKT_WREG32_CTL_REG_OFFSET_SHIFT 0
#define GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK 0x0000FFFF
struct packet_wreg32 {
__le32 value;
__le32 ctl;
};
struct packet_wreg_bulk {
__le32 size64;
__le32 ctl;
__le64 values[0]; /* data starts here */
};
struct packet_msg_long {
__le32 value;
__le32 ctl;
__le64 addr;
};
struct packet_msg_short {
__le32 value;
__le32 ctl;
};
struct packet_msg_prot {
__le32 value;
__le32 ctl;
__le64 addr;
};
struct packet_fence {
__le32 cfg;
__le32 ctl;
};
#define GOYA_PKT_LIN_DMA_CTL_WO_SHIFT 0
#define GOYA_PKT_LIN_DMA_CTL_WO_MASK 0x00000001
#define GOYA_PKT_LIN_DMA_CTL_RDCOMP_SHIFT 1
#define GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK 0x00000002
#define GOYA_PKT_LIN_DMA_CTL_WRCOMP_SHIFT 2
#define GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK 0x00000004
#define GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT 6
#define GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK 0x00000040
#define GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT 20
#define GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK 0x00700000
struct packet_lin_dma {
__le32 tsize;
__le32 ctl;
__le64 src_addr;
__le64 dst_addr;
};
struct packet_cp_dma {
__le32 tsize;
__le32 ctl;
__le64 src_addr;
};
#endif /* GOYA_PACKETS_H */
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2018 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef QMAN_IF_H
#define QMAN_IF_H
#include <linux/types.h>
/*
* PRIMARY QUEUE
*/
struct hl_bd {
__le64 ptr;
__le32 len;
__le32 ctl;
};
#define HL_BD_SIZE sizeof(struct hl_bd)
/*
* BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
* valid. 1 means the repeat field is valid, 0 means not-valid,
* i.e. repeat == 1
*/
#define BD_CTL_REPEAT_VALID_SHIFT 24
#define BD_CTL_REPEAT_VALID_MASK 0x01000000
#define BD_CTL_SHADOW_INDEX_SHIFT 0
#define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF
/*
* COMPLETION QUEUE
*/
struct hl_cq_entry {
__le32 data;
};
#define HL_CQ_ENTRY_SIZE sizeof(struct hl_cq_entry)
#define CQ_ENTRY_READY_SHIFT 31
#define CQ_ENTRY_READY_MASK 0x80000000
#define CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT 30
#define CQ_ENTRY_SHADOW_INDEX_VALID_MASK 0x40000000
#define CQ_ENTRY_SHADOW_INDEX_SHIFT BD_CTL_SHADOW_INDEX_SHIFT
#define CQ_ENTRY_SHADOW_INDEX_MASK BD_CTL_SHADOW_INDEX_MASK
#endif /* QMAN_IF_H */
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "habanalabs.h"
#include <linux/irqreturn.h>
/*
* hl_cq_inc_ptr - increment ci or pi of cq
*
* @ptr: the current ci or pi value of the completion queue
*
* Increment ptr by 1. If it reaches the number of completion queue
* entries, set it to 0
*/
inline u32 hl_cq_inc_ptr(u32 ptr)
{
ptr++;
if (unlikely(ptr == HL_CQ_LENGTH))
ptr = 0;
return ptr;
}
/*
* hl_irq_handler_cq - irq handler for completion queue
*
* @irq: irq number
* @arg: pointer to completion queue structure
*
*/
irqreturn_t hl_irq_handler_cq(int irq, void *arg)
{
struct hl_cq *cq = arg;
struct hl_device *hdev = cq->hdev;
struct hl_hw_queue *queue;
struct hl_cs_job *job;
bool shadow_index_valid;
u16 shadow_index;
u32 *cq_entry;
u32 *cq_base;
if (hdev->disabled) {
dev_dbg(hdev->dev,
"Device disabled but received IRQ %d for CQ %d\n",
irq, cq->hw_queue_id);
return IRQ_HANDLED;
}
cq_base = (u32 *) (uintptr_t) cq->kernel_address;
while (1) {
bool entry_ready = ((cq_base[cq->ci] & CQ_ENTRY_READY_MASK)
>> CQ_ENTRY_READY_SHIFT);
if (!entry_ready)
break;
cq_entry = (u32 *) &cq_base[cq->ci];
/*
* Make sure we read CQ entry contents after we've
* checked the ownership bit.
*/
dma_rmb();
shadow_index_valid =
((*cq_entry & CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
>> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
shadow_index = (u16)
((*cq_entry & CQ_ENTRY_SHADOW_INDEX_MASK)
>> CQ_ENTRY_SHADOW_INDEX_SHIFT);
queue = &hdev->kernel_queues[cq->hw_queue_id];
if ((shadow_index_valid) && (!hdev->disabled)) {
job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
queue_work(hdev->cq_wq, &job->finish_work);
}
/*
* Update ci of the context's queue. There is no
* need to protect it with spinlock because this update is
* done only inside IRQ and there is a different IRQ per
* queue
*/
queue->ci = hl_queue_inc_ptr(queue->ci);
/* Clear CQ entry ready bit */
cq_base[cq->ci] &= ~CQ_ENTRY_READY_MASK;
cq->ci = hl_cq_inc_ptr(cq->ci);
/* Increment free slots */
atomic_inc(&cq->free_slots_cnt);
}
return IRQ_HANDLED;
}
/*
* hl_cq_init - main initialization function for an cq object
*
* @hdev: pointer to device structure
* @q: pointer to cq structure
* @hw_queue_id: The H/W queue ID this completion queue belongs to
*
* Allocate dma-able memory for the completion queue and initialize fields
* Returns 0 on success
*/
int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
{
void *p;
BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
if (!p)
return -ENOMEM;
q->hdev = hdev;
q->kernel_address = (u64) (uintptr_t) p;
q->hw_queue_id = hw_queue_id;
q->ci = 0;
q->pi = 0;
atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
return 0;
}
/*
* hl_cq_fini - destroy completion queue
*
* @hdev: pointer to device structure
* @q: pointer to cq structure
*
* Free the completion queue memory
*/
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
{
hdev->asic_funcs->dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address, q->bus_address);
}
...@@ -17,6 +17,35 @@ ...@@ -17,6 +17,35 @@
*/ */
#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */
/*
* Queue Numbering
*
* The external queues (DMA channels + CPU) MUST be before the internal queues
* and each group (DMA channels + CPU and internal) must be contiguous inside
* itself but there can be a gap between the two groups (although not
* recommended)
*/
enum goya_queue_id {
GOYA_QUEUE_ID_DMA_0 = 0,
GOYA_QUEUE_ID_DMA_1,
GOYA_QUEUE_ID_DMA_2,
GOYA_QUEUE_ID_DMA_3,
GOYA_QUEUE_ID_DMA_4,
GOYA_QUEUE_ID_CPU_PQ,
GOYA_QUEUE_ID_MME,
GOYA_QUEUE_ID_TPC0,
GOYA_QUEUE_ID_TPC1,
GOYA_QUEUE_ID_TPC2,
GOYA_QUEUE_ID_TPC3,
GOYA_QUEUE_ID_TPC4,
GOYA_QUEUE_ID_TPC5,
GOYA_QUEUE_ID_TPC6,
GOYA_QUEUE_ID_TPC7,
GOYA_QUEUE_ID_SIZE
};
/* Opcode to create a new command buffer */ /* Opcode to create a new command buffer */
#define HL_CB_OP_CREATE 0 #define HL_CB_OP_CREATE 0
/* Opcode to destroy previously created command buffer */ /* Opcode to destroy previously created command buffer */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment