Commit f4e7906d authored by Ohad Sharabi's avatar Ohad Sharabi Committed by Oded Gabbay

habanalabs: use variable poll interval for fw loading

Using a variable poll interval for fw loading allows us to support
much slower environments (emulation) while changing only a single
line in the code, instead of choosing a different interval in each
function that polls.
Signed-off-by: default avatarOhad Sharabi <osharabi@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 8f82ff75
...@@ -15,8 +15,6 @@ ...@@ -15,8 +15,6 @@
#define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */ #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
#define FW_CPU_STATUS_POLL_INTERVAL_USEC 10000
static char *extract_fw_ver_from_str(const char *fw_str) static char *extract_fw_ver_from_str(const char *fw_str)
{ {
char *str, *fw_ver, *whitespace; char *str, *fw_ver, *whitespace;
...@@ -1102,7 +1100,7 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev, ...@@ -1102,7 +1100,7 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
(status == CPU_BOOT_STATUS_NIC_FW_RDY) || (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) || (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT), (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
FW_CPU_STATUS_POLL_INTERVAL_USEC, hdev->fw_poll_interval_usec,
timeout); timeout);
if (rc) { if (rc) {
...@@ -1286,11 +1284,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, ...@@ -1286,11 +1284,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
{ {
int rc; int rc;
/* pldm was added for cases in which we use preboot on pldm and want if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
* to load boot fit, but we can't wait for preboot because it runs
* very slowly
*/
if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
return 0; return 0;
/* /*
...@@ -1436,7 +1430,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev, ...@@ -1436,7 +1430,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev,
le32_to_cpu(dyn_regs->cpu_cmd_status_to_host), le32_to_cpu(dyn_regs->cpu_cmd_status_to_host),
status, status,
FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status, FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status,
FW_CPU_STATUS_POLL_INTERVAL_USEC, hdev->fw_poll_interval_usec,
timeout); timeout);
if (rc) { if (rc) {
...@@ -2070,7 +2064,7 @@ static int hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device *hdev, ...@@ -2070,7 +2064,7 @@ static int hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device *hdev,
status, status,
(status == CPU_BOOT_STATUS_READY_TO_BOOT) || (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_SRAM_AVAIL), (status == CPU_BOOT_STATUS_SRAM_AVAIL),
FW_CPU_STATUS_POLL_INTERVAL_USEC, hdev->fw_poll_interval_usec,
dyn_loader->wait_for_bl_timeout); dyn_loader->wait_for_bl_timeout);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to wait for boot\n"); dev_err(hdev->dev, "failed to wait for boot\n");
...@@ -2097,7 +2091,7 @@ static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev, ...@@ -2097,7 +2091,7 @@ static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev,
le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status), le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
status, status,
(status == CPU_BOOT_STATUS_SRAM_AVAIL), (status == CPU_BOOT_STATUS_SRAM_AVAIL),
FW_CPU_STATUS_POLL_INTERVAL_USEC, hdev->fw_poll_interval_usec,
fw_loader->cpu_timeout); fw_loader->cpu_timeout);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to wait for Linux\n"); dev_err(hdev->dev, "failed to wait for Linux\n");
...@@ -2296,6 +2290,15 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, ...@@ -2296,6 +2290,15 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
goto protocol_err; goto protocol_err;
} }
/*
* when testing FW load (without Linux) on PLDM we don't want to
* wait until boot fit is active as it may take several hours.
* instead, we load the bootfit and let it do all initializations in
* the background.
*/
if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX))
return 0;
rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader); rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader);
if (rc) if (rc)
goto protocol_err; goto protocol_err;
...@@ -2388,7 +2391,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, ...@@ -2388,7 +2391,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_boot_status_reg, cpu_boot_status_reg,
status, status,
status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT, status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
FW_CPU_STATUS_POLL_INTERVAL_USEC, hdev->fw_poll_interval_usec,
fw_loader->boot_fit_timeout); fw_loader->boot_fit_timeout);
if (rc) { if (rc) {
...@@ -2411,7 +2414,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, ...@@ -2411,7 +2414,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_msg_status_reg, cpu_msg_status_reg,
status, status,
status == CPU_MSG_OK, status == CPU_MSG_OK,
FW_CPU_STATUS_POLL_INTERVAL_USEC, hdev->fw_poll_interval_usec,
fw_loader->boot_fit_timeout); fw_loader->boot_fit_timeout);
if (rc) { if (rc) {
...@@ -2440,7 +2443,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, ...@@ -2440,7 +2443,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
(status == CPU_BOOT_STATUS_NIC_FW_RDY) || (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
(status == CPU_BOOT_STATUS_READY_TO_BOOT) || (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_SRAM_AVAIL), (status == CPU_BOOT_STATUS_SRAM_AVAIL),
FW_CPU_STATUS_POLL_INTERVAL_USEC, hdev->fw_poll_interval_usec,
cpu_timeout); cpu_timeout);
dev_dbg(hdev->dev, "uboot status = %d\n", status); dev_dbg(hdev->dev, "uboot status = %d\n", status);
...@@ -2489,7 +2492,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, ...@@ -2489,7 +2492,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_boot_status_reg, cpu_boot_status_reg,
status, status,
(status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED), (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
FW_CPU_STATUS_POLL_INTERVAL_USEC, hdev->fw_poll_interval_usec,
cpu_timeout); cpu_timeout);
if (rc) { if (rc) {
...@@ -2509,7 +2512,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev, ...@@ -2509,7 +2512,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
cpu_boot_status_reg, cpu_boot_status_reg,
status, status,
(status == CPU_BOOT_STATUS_SRAM_AVAIL), (status == CPU_BOOT_STATUS_SRAM_AVAIL),
FW_CPU_STATUS_POLL_INTERVAL_USEC, hdev->fw_poll_interval_usec,
cpu_timeout); cpu_timeout);
/* Clear message */ /* Clear message */
......
...@@ -61,6 +61,9 @@ ...@@ -61,6 +61,9 @@
#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ #define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ #define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
#define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */
#define HL_FW_STATUS_PLDM_POLL_INTERVAL_USEC 300000000 /* 300s */
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */ #define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
...@@ -2459,6 +2462,7 @@ struct multi_cs_data { ...@@ -2459,6 +2462,7 @@ struct multi_cs_data {
* @last_open_session_duration_jif: duration (jiffies) of the last device open * @last_open_session_duration_jif: duration (jiffies) of the last device open
* session. * session.
* @open_counter: number of successful device open operations. * @open_counter: number of successful device open operations.
* @fw_poll_interval_usec: FW status poll interval in usec.
* @in_reset: is device in reset flow. * @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile. * @curr_pll_profile: current PLL profile.
* @card_type: Various ASICs have several card types. This indicates the card * @card_type: Various ASICs have several card types. This indicates the card
...@@ -2607,6 +2611,7 @@ struct hl_device { ...@@ -2607,6 +2611,7 @@ struct hl_device {
u64 last_successful_open_jif; u64 last_successful_open_jif;
u64 last_open_session_duration_jif; u64 last_open_session_duration_jif;
u64 open_counter; u64 open_counter;
u64 fw_poll_interval_usec;
atomic_t in_reset; atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile; enum hl_pll_frequency curr_pll_profile;
enum cpucp_card_types card_type; enum cpucp_card_types card_type;
......
...@@ -345,6 +345,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, ...@@ -345,6 +345,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
set_driver_behavior_per_device(hdev); set_driver_behavior_per_device(hdev);
hdev->fw_poll_interval_usec = hdev->pldm ? HL_FW_STATUS_PLDM_POLL_INTERVAL_USEC :
HL_FW_STATUS_POLL_INTERVAL_USEC;
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment