Commit be24dd48 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2021-10-18' of...

Merge tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains habanalabs driver changes for v5.16:

- Add a new uAPI (under the memory ioctl) to request from the driver
  to export a DMA-BUF object that represents a memory region on
  the device's DRAM. This is needed to enable peer-to-peer over PCIe
  between habana device and an RDMA adapter (e.g. mlnx5 or efa
  rdma adapter).

- Add debugfs node to dynamically configure CS timeout. Up until now,
  it was only configurable through kernel module parameter.

- Fetch more comprehensive power information from the firmware.

- Always take timestamp when waiting for user interrupt, as the user
  needs that information to optimize the graph runtime compilation.

- Modify user interrupt to look on 64-bit user value as fence, instead
  of 32-bit.

- Bypass reset in case of repeated h/w error event after device reset.
  This is to prevent endless loop of resets to the device.

- Fix several bugs in multi CS completion code.

- Fix race condition in fd close/open.

- Update to latest firmware headers

- Add select CRC32 in kconfig

- Small fixes, cosmetics

* tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (25 commits)
  habanalabs: refactor fence handling in hl_cs_poll_fences
  habanalabs: context cleanup cosmetics
  habanalabs: simplify wait for interrupt with timestamp flow
  habanalabs: initialize hpriv fields before adding new node
  habanalabs: Unify frequency set/get functionality
  habanalabs: select CRC32
  habanalabs: add support for dma-buf exporter
  habanalabs: define uAPI to export FD for DMA-BUF
  habanalabs: fix NULL pointer dereference
  habanalabs: fix race condition in multi CS completion
  habanalabs: use only u32
  habanalabs: update firmware files
  habanalabs: bypass reset for continuous h/w error event
  habanalabs: take timestamp on wait for interrupt
  habanalabs: prevent race between fd close/open
  habanalabs: refactor reset log message
  habanalabs: define soft-reset as inference op
  habanalabs: fix debugfs device memory MMU VA translation
  habanalabs: add support for a long interrupt target value
  habanalabs: remove redundant cs validity checks
  ...
parents 2b74240b b2faac38
......@@ -226,6 +226,12 @@ Description: Gets the state dump occurring on a CS timeout or failure.
Writing an integer X discards X state dumps, so that the
next read would return X+1-st newest state dump.
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
Date: Sep 2021
KernelVersion: 5.16
Contact: obitton@habana.ai
Description: Sets the command submission timeout value in seconds.
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date: Mar 2020
KernelVersion: 5.6
......
......@@ -8,6 +8,8 @@ config HABANA_AI
depends on PCI && HAS_IOMEM
select GENERIC_ALLOCATOR
select HWMON
select DMA_SHARED_BUFFER
select CRC32
help
Enables PCIe card driver for Habana's AI Processors (AIP) that are
designed to accelerate Deep Learning inference and training workloads.
......
......@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/firmware_if.o \
common/state_dump.o
common/state_dump.o common/hwmgr.o
......@@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence)
fence->cs_sequence = sequence;
fence->error = 0;
fence->timestamp = ktime_set(0, 0);
fence->mcs_handling_done = false;
init_completion(&fence->completion);
}
......@@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
/* Don't cancel TDR in case this CS was timedout because we might be
* running from the TDR context
*/
if (cs && (cs->timedout ||
hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
return;
if (cs && cs->tdr_active)
if (cs->tdr_active)
cancel_delayed_work_sync(&cs->work_tdr);
spin_lock(&hdev->cs_mirror_lock);
......@@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
mcs_compl->timestamp =
ktime_to_ns(fence->timestamp);
complete_all(&mcs_compl->completion);
/*
* Setting mcs_handling_done inside the lock ensures
* at least one fence have mcs_handling_done set to
* true before wait for mcs finish. This ensures at
* least one CS will be set as completed when polling
* mcs fences.
*/
fence->mcs_handling_done = true;
}
spin_unlock(&mcs_compl->lock);
}
/* In case CS completed without mcs completion initialized */
fence->mcs_handling_done = true;
}
static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
......@@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
break;
}
mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;
if (status == CS_WAIT_STATUS_BUSY)
continue;
mcs_data->completion_bitmap |= BIT(i);
/*
* best effort to extract timestamp. few notes:
* - if even single fence is gone we cannot extract timestamp
* (as fence not exist anymore)
* - for all completed CSs we take the earliest timestamp.
* for this we have to validate that:
* 1. given timestamp was indeed set
* 2. the timestamp is earliest of all timestamps so far
*/
switch (status) {
case CS_WAIT_STATUS_BUSY:
/* CS did not finished, keep waiting on its QID*/
mcs_data->stream_master_qid_map |=
fence->stream_master_qid_map;
break;
case CS_WAIT_STATUS_COMPLETED:
/*
* Using mcs_handling_done to avoid possibility of mcs_data
* returns to user indicating CS completed before it finished
* all of its mcs handling, to avoid race the next time the
* user waits for mcs.
*/
if (!fence->mcs_handling_done)
break;
if (status == CS_WAIT_STATUS_GONE) {
mcs_data->completion_bitmap |= BIT(i);
/*
* For all completed CSs we take the earliest timestamp.
* For this we have to validate that the timestamp is
* earliest of all timestamps so far.
*/
if (mcs_data->update_ts &&
(ktime_compare(fence->timestamp, first_cs_time) < 0))
first_cs_time = fence->timestamp;
break;
case CS_WAIT_STATUS_GONE:
mcs_data->update_ts = false;
mcs_data->gone_cs = true;
} else if (mcs_data->update_ts &&
(ktime_compare(fence->timestamp,
ktime_set(0, 0)) > 0) &&
(ktime_compare(fence->timestamp, first_cs_time) < 0)) {
first_cs_time = fence->timestamp;
/*
* It is possible to get an old sequence numbers from user
* which related to already completed CSs and their fences
* already gone. In this case, CS set as completed but
* no need to consider its QID for mcs completion.
*/
mcs_data->completion_bitmap |= BIT(i);
break;
default:
dev_err(hdev->dev, "Invalid fence status\n");
return -EINVAL;
}
}
hl_fences_put(mcs_data->fence_arr, arr_len);
......@@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
u32 timeout_us, u64 user_address,
u32 target_value, u16 interrupt_offset,
enum hl_cs_wait_status *status)
u64 target_value, u16 interrupt_offset,
enum hl_cs_wait_status *status,
u64 *timestamp)
{
struct hl_user_pending_interrupt *pend;
struct hl_user_interrupt *interrupt;
unsigned long timeout, flags;
u32 completion_value;
u64 completion_value;
long completion_rc;
int rc = 0;
......@@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
/* We check for completion value as interrupt could have been received
* before we added the node to the wait list
*/
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;
goto remove_pending_user_interrupt;
}
if (completion_value >= target_value)
if (completion_value >= target_value) {
*status = CS_WAIT_STATUS_COMPLETED;
else
/* There was no interrupt, we assume the completion is now. */
pend->fence.timestamp = ktime_get();
} else
*status = CS_WAIT_STATUS_BUSY;
if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
......@@ -2812,7 +2842,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
reinit_completion(&pend->fence.completion);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT;
......@@ -2839,6 +2869,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
list_del(&pend->wait_list_node);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
*timestamp = ktime_to_ns(pend->fence.timestamp);
kfree(pend);
hl_ctx_put(ctx);
......@@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
struct asic_fixed_properties *prop;
union hl_wait_cs_args *args = data;
enum hl_cs_wait_status status;
u64 timestamp;
int rc;
prop = &hdev->asic_prop;
......@@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
args->in.interrupt_timeout_us, args->in.addr,
args->in.target, interrupt_offset, &status);
args->in.target, interrupt_offset, &status,
&timestamp);
if (rc) {
if (rc != -EINTR)
......@@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
memset(args, 0, sizeof(*args));
if (timestamp) {
args->out.timestamp_nsec = timestamp;
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
}
switch (status) {
case CS_WAIT_STATUS_COMPLETED:
args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
......
......@@ -181,12 +181,6 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
return rc;
}
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
{
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
return;
}
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
int rc = 0;
......@@ -392,7 +386,7 @@ void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id)
hl_ctx_free(hdev, ctx);
kref_put(&ctx->refcount, hl_ctx_do_release);
idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->ctx_lock);
......
......@@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
return count;
}
static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
char tmp_buf[200];
ssize_t rc;
if (*ppos)
return 0;
sprintf(tmp_buf, "%d\n",
jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
strlen(tmp_buf) + 1);
return rc;
}
static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u32 value;
ssize_t rc;
rc = kstrtouint_from_user(buf, count, 10, &value);
if (rc)
return rc;
if (value)
hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
return count;
}
static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read32,
......@@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = {
.write = hl_state_dump_write
};
static const struct file_operations hl_timeout_locked_fops = {
.owner = THIS_MODULE,
.read = hl_timeout_locked_read,
.write = hl_timeout_locked_write
};
static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL},
......@@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_state_dump_fops);
debugfs_create_file("timeout_locked",
0644,
dev_entry->root,
dev_entry,
&hl_timeout_locked_fops);
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name,
0444,
......
......@@ -69,13 +69,6 @@ static void hpriv_release(struct kref *ref)
mutex_destroy(&hpriv->restore_phase_mutex);
mutex_lock(&hdev->fpriv_list_lock);
list_del(&hpriv->dev_node);
hdev->compute_ctx = NULL;
mutex_unlock(&hdev->fpriv_list_lock);
kfree(hpriv);
if ((!hdev->pldm) && (hdev->pdev) &&
(!hdev->asic_funcs->is_device_idle(hdev,
idle_mask,
......@@ -87,9 +80,32 @@ static void hpriv_release(struct kref *ref)
device_is_idle = false;
}
/* We need to remove the user from the list to make sure the reset process won't
* try to kill the user process. Because, if we got here, it means there are no
* more driver/device resources that the user process is occupying so there is
* no need to kill it
*
* However, we can't set the compute_ctx to NULL at this stage. This is to prevent
* a race between the release and opening the device again. We don't want to let
* a user open the device while there a reset is about to happen.
*/
mutex_lock(&hdev->fpriv_list_lock);
list_del(&hpriv->dev_node);
mutex_unlock(&hdev->fpriv_list_lock);
if ((hdev->reset_if_device_not_idle && !device_is_idle)
|| hdev->reset_upon_device_release)
hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
/* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
* thread, we don't care because the in_reset is marked so if a user will try to open
* the device it will fail on that, even if compute_ctx is NULL.
*/
mutex_lock(&hdev->fpriv_list_lock);
hdev->compute_ctx = NULL;
mutex_unlock(&hdev->fpriv_list_lock);
kfree(hpriv);
}
void hl_hpriv_get(struct hl_fpriv *hpriv)
......@@ -530,6 +546,19 @@ static void hl_device_heartbeat(struct work_struct *work)
return;
reschedule:
/*
* prev_reset_trigger tracks consecutive fatal h/w errors until first
* heartbeat immediately post reset.
* If control reached here, then at least one heartbeat work has been
* scheduled since last reset/init cycle.
* So if the device is not already in reset cycle, reset the flag
* prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR
* status for at least one heartbeat. From this point driver restarts
* tracking future consecutive fatal errors.
*/
if (!(atomic_read(&hdev->in_reset)))
hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
schedule_delayed_work(&hdev->work_heartbeat,
usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
}
......@@ -909,6 +938,65 @@ static void device_disable_open_processes(struct hl_device *hdev)
mutex_unlock(&hdev->fpriv_list_lock);
}
static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
{
u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
/*
* 'reset cause' is being updated here, because getting here
* means that it's the 1st time and the last time we're here
* ('in_reset' makes sure of it). This makes sure that
* 'reset_cause' will continue holding its 1st recorded reason!
*/
if (flags & HL_RESET_HEARTBEAT) {
hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
cur_reset_trigger = HL_RESET_HEARTBEAT;
} else if (flags & HL_RESET_TDR) {
hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
cur_reset_trigger = HL_RESET_TDR;
} else if (flags & HL_RESET_FW_FATAL_ERR) {
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
cur_reset_trigger = HL_RESET_FW_FATAL_ERR;
} else {
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
}
/*
* If reset cause is same twice, then reset_trigger_repeated
* is set and if this reset is due to a fatal FW error
* device is set to an unstable state.
*/
if (hdev->prev_reset_trigger != cur_reset_trigger) {
hdev->prev_reset_trigger = cur_reset_trigger;
hdev->reset_trigger_repeated = 0;
} else {
hdev->reset_trigger_repeated = 1;
}
/* If reset is due to heartbeat, device CPU is no responsive in
* which case no point sending PCI disable message to it.
*
* If F/W is performing the reset, no need to send it a message to disable
* PCI access
*/
if ((flags & HL_RESET_HARD) &&
!(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
/* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W
* sending us interrupts after that. We need to disable
* the access here because if the device is marked
* disable, the message won't be send. Also, in case
* of heartbeat, the device CPU is marked as disable
* so this message won't be sent
*/
if (hl_fw_send_pci_access_msg(hdev,
CPUCP_PACKET_DISABLE_PCI_ACCESS))
dev_warn(hdev->dev,
"Failed to disable PCI access by F/W\n");
}
}
/*
* hl_device_reset - reset the device
*
......@@ -954,7 +1042,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
goto do_reset;
}
if (!hard_reset && !hdev->allow_external_soft_reset) {
if (!hard_reset && !hdev->allow_inference_soft_reset) {
hard_instead_soft = true;
hard_reset = true;
}
......@@ -978,47 +1066,21 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
if (rc)
return 0;
/*
* 'reset cause' is being updated here, because getting here
* means that it's the 1st time and the last time we're here
* ('in_reset' makes sure of it). This makes sure that
* 'reset_cause' will continue holding its 1st recorded reason!
*/
if (flags & HL_RESET_HEARTBEAT)
hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
else if (flags & HL_RESET_TDR)
hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
else
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
/* If reset is due to heartbeat, device CPU is no responsive in
* which case no point sending PCI disable message to it.
*
* If F/W is performing the reset, no need to send it a message to disable
* PCI access
*/
if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
/* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W
* sending us interrupts after that. We need to disable
* the access here because if the device is marked
* disable, the message won't be send. Also, in case
* of heartbeat, the device CPU is marked as disable
* so this message won't be sent
*/
if (hl_fw_send_pci_access_msg(hdev,
CPUCP_PACKET_DISABLE_PCI_ACCESS))
dev_warn(hdev->dev,
"Failed to disable PCI access by F/W\n");
}
handle_reset_trigger(hdev, flags);
/* This also blocks future CS/VM/JOB completion operations */
hdev->disabled = true;
take_release_locks(hdev);
dev_err(hdev->dev, "Going to RESET device!\n");
if (hard_reset)
dev_info(hdev->dev, "Going to reset device\n");
else if (flags & HL_RESET_DEVICE_RELEASE)
dev_info(hdev->dev,
"Going to reset device after it was released by user\n");
else
dev_info(hdev->dev,
"Going to reset compute engines of inference device\n");
}
again:
......@@ -1108,6 +1170,17 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hdev->device_cpu_disabled = false;
hdev->hard_reset_pending = false;
if (hdev->reset_trigger_repeated &&
(hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) {
/* if there 2 back to back resets from FW,
* ensure driver puts the driver in a unusable state
*/
dev_crit(hdev->dev,
"Consecutive FW fatal errors received, stopping hard reset\n");
rc = -EIO;
goto out_err;
}
if (hdev->kernel_ctx) {
dev_crit(hdev->dev,
"kernel ctx was alive during hard reset, something is terribly wrong\n");
......
......@@ -2162,18 +2162,17 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
}
/**
* hl_fw_dynamic_report_reset_cause - send a COMMS message with the cause
* of the newly triggered hard reset
* hl_fw_dynamic_send_msg - send a COMMS message with attached data
*
* @hdev: pointer to the habanalabs device structure
* @fw_loader: managing structure for loading device's FW
* @reset_cause: enumerated cause for the recent hard reset
* @msg_type: message type
* @data: data to be sent
*
* @return 0 on success, otherwise non-zero error code
*/
static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev,
struct fw_load_mgr *fw_loader,
enum comms_reset_cause reset_cause)
static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
struct fw_load_mgr *fw_loader, u8 msg_type, void *data)
{
struct lkd_msg_comms msg;
int rc;
......@@ -2181,11 +2180,20 @@ static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev,
memset(&msg, 0, sizeof(msg));
/* create message to be sent */
msg.header.type = HL_COMMS_RESET_CAUSE_TYPE;
msg.header.type = msg_type;
msg.header.size = cpu_to_le16(sizeof(struct comms_msg_header));
msg.header.magic = cpu_to_le32(HL_COMMS_MSG_MAGIC);
msg.reset_cause = reset_cause;
switch (msg_type) {
case HL_COMMS_RESET_CAUSE_TYPE:
msg.reset_cause = *(__u8 *) data;
break;
default:
dev_err(hdev->dev,
"Send COMMS message - invalid message type %u\n",
msg_type);
return -EINVAL;
}
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
sizeof(struct lkd_msg_comms));
......@@ -2252,8 +2260,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
goto protocol_err;
if (hdev->curr_reset_cause) {
rc = hl_fw_dynamic_report_reset_cause(hdev, fw_loader,
hdev->curr_reset_cause);
rc = hl_fw_dynamic_send_msg(hdev, fw_loader,
HL_COMMS_RESET_CAUSE_TYPE, &hdev->curr_reset_cause);
if (rc)
goto protocol_err;
......
......@@ -26,6 +26,7 @@
#include <linux/sched/signal.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/coresight.h>
#include <linux/dma-buf.h>
#define HL_NAME "habanalabs"
......@@ -68,6 +69,9 @@
#define HL_STATE_DUMP_HIST_LEN 5
/* Default value for device reset trigger , an invalid value */
#define HL_RESET_TRIGGER_DEFAULT 0xFF
#define OBJ_NAMES_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
#define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
......@@ -132,13 +136,18 @@ enum hl_mmu_page_table_location {
* - HL_RESET_FW
* F/W will perform the reset. No need to ask it to reset the device. This is relevant
* only when running with secured f/w
*
* - HL_RESET_FW_FATAL_ERR
* Set if reset is due to a fatal error from FW
*/
#define HL_RESET_HARD (1 << 0)
#define HL_RESET_FROM_RESET_THREAD (1 << 1)
#define HL_RESET_HEARTBEAT (1 << 2)
#define HL_RESET_TDR (1 << 3)
#define HL_RESET_DEVICE_RELEASE (1 << 4)
#define HL_RESET_FW (1 << 5)
#define HL_RESET_FW_FATAL_ERR (1 << 6)
#define HL_MAX_SOBS_PER_MONITOR 8
......@@ -447,6 +456,9 @@ struct hl_hints_range {
* for hints validity check.
* device_dma_offset_for_host_access: the offset to add to host DMA addresses
* to enable the device to access them.
* @max_freq_value: current max clk frequency.
* @clk_pll_index: clock PLL index that specify which PLL determines the clock
* we display to the user
* @mmu_pgt_size: MMU page tables total size.
* @mmu_pte_size: PTE size in MMU page tables.
* @mmu_hop_table_size: MMU hop table size.
......@@ -543,6 +555,8 @@ struct asic_fixed_properties {
u64 cb_va_end_addr;
u64 dram_hints_align_mask;
u64 device_dma_offset_for_host_access;
u64 max_freq_value;
u32 clk_pll_index;
u32 mmu_pgt_size;
u32 mmu_pte_size;
u32 mmu_hop_table_size;
......@@ -601,6 +615,9 @@ struct asic_fixed_properties {
* masters QIDs that multi cs is waiting on
* @error: mark this fence with error
* @timestamp: timestamp upon completion
* @mcs_handling_done: indicates that corresponding command submission has
* finished msc handling, this does not mean it was part
* of the mcs
*/
struct hl_fence {
struct completion completion;
......@@ -609,6 +626,7 @@ struct hl_fence {
u32 stream_master_qid_map;
int error;
ktime_t timestamp;
u8 mcs_handling_done;
};
/**
......@@ -1352,6 +1370,23 @@ struct hl_cs_counters_atomic {
atomic64_t validation_drop_cnt;
};
/**
* struct hl_dmabuf_priv - a dma-buf private object.
* @dmabuf: pointer to dma-buf object.
* @ctx: pointer to the dma-buf owner's context.
* @phys_pg_pack: pointer to physical page pack if the dma-buf was exported for
* memory allocation handle.
* @device_address: physical address of the device's memory. Relevant only
* if phys_pg_pack is NULL (dma-buf was exported from address).
* The total size can be taken from the dmabuf object.
*/
struct hl_dmabuf_priv {
struct dma_buf *dmabuf;
struct hl_ctx *ctx;
struct hl_vm_phys_pg_pack *phys_pg_pack;
uint64_t device_address;
};
/**
* struct hl_ctx - user/kernel context.
* @mem_hash: holds mapping from virtual address to virtual memory area
......@@ -1662,6 +1697,7 @@ struct hl_vm_hw_block_list_node {
* @npages: num physical pages in the pack.
* @total_size: total size of all the pages in this list.
* @mapping_cnt: number of shared mappings.
* @exporting_cnt: number of dma-buf exporting.
* @asid: the context related to this list.
* @page_size: size of each page in the pack.
* @flags: HL_MEM_* flags related to this list.
......@@ -1676,6 +1712,7 @@ struct hl_vm_phys_pg_pack {
u64 npages;
u64 total_size;
atomic_t mapping_cnt;
u32 exporting_cnt;
u32 asid;
u32 page_size;
u32 flags;
......@@ -2396,6 +2433,7 @@ struct multi_cs_data {
* the error will be ignored by the driver during
* device initialization. Mainly used to debug and
* workaround firmware bugs
* @dram_pci_bar_start: start bus address of PCIe bar towards DRAM.
* @last_successful_open_jif: timestamp (jiffies) of the last successful
* device open.
* @last_open_session_duration_jif: duration (jiffies) of the last device open
......@@ -2440,8 +2478,12 @@ struct multi_cs_data {
* @collective_mon_idx: helper index for collective initialization
* @supports_coresight: is CoreSight supported.
* @supports_soft_reset: is soft reset supported.
* @allow_external_soft_reset: true if soft reset initiated by user or TDR is
* allowed.
* @allow_inference_soft_reset: true if the ASIC supports soft reset that is
* initiated by user or TDR. This is only true
* in inference ASICs, as there is no real-world
* use-case of doing soft-reset in training (due
* to the fact that training runs on multiple
* devices)
* @supports_cb_mapping: is mapping a CB to the device's MMU supported.
* @needs_reset: true if reset_on_lockup is false and device should be reset
* due to lockup.
......@@ -2452,6 +2494,10 @@ struct multi_cs_data {
* @supports_staged_submission: true if staged submissions are supported
* @curr_reset_cause: saves an enumerated reset cause when a hard reset is
* triggered, and cleared after it is shared with preboot.
* @prev_reset_trigger: saves the previous trigger which caused a reset, overidden
* with a new value on next reset
* @reset_trigger_repeated: set if device reset is triggered more than once with
* same cause.
* @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
* complete instead.
* @device_cpu_is_halted: Flag to indicate whether the device CPU was already
......@@ -2537,6 +2583,7 @@ struct hl_device {
u64 max_power;
u64 clock_gating_mask;
u64 boot_error_status_mask;
u64 dram_pci_bar_start;
u64 last_successful_open_jif;
u64 last_open_session_duration_jif;
u64 open_counter;
......@@ -2572,13 +2619,15 @@ struct hl_device {
u8 collective_mon_idx;
u8 supports_coresight;
u8 supports_soft_reset;
u8 allow_external_soft_reset;
u8 allow_inference_soft_reset;
u8 supports_cb_mapping;
u8 needs_reset;
u8 process_kill_trial_cnt;
u8 device_fini_pending;
u8 supports_staged_submission;
u8 curr_reset_cause;
u8 prev_reset_trigger;
u8 reset_trigger_repeated;
u8 skip_reset_on_timeout;
u8 device_cpu_is_halted;
u8 supports_wait_for_multi_cs;
......@@ -2956,6 +3005,15 @@ int hl_set_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_set_power(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_get_power(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_clk_rate(struct hl_device *hdev,
u32 *cur_clk, u32 *max_clk);
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void hl_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void hw_sob_get(struct hl_hw_sob *hw_sob);
void hw_sob_put(struct hl_hw_sob *hw_sob);
void hl_encaps_handle_do_release(struct kref *ref);
......
......@@ -225,6 +225,17 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
if (!hpriv)
return -ENOMEM;
/* Prevent other routines from reading partial hpriv data by
* initializing hpriv fields before inserting it to the list
*/
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
hpriv->is_control = true;
nonseekable_open(inode, filp);
hpriv->taskpid = find_get_pid(current->pid);
mutex_lock(&hdev->fpriv_list_lock);
if (!hl_device_operational(hdev, NULL)) {
......@@ -238,19 +249,15 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
list_add(&hpriv->dev_node, &hdev->fpriv_list);
mutex_unlock(&hdev->fpriv_list_lock);
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
hpriv->is_control = true;
nonseekable_open(inode, filp);
hpriv->taskpid = find_get_pid(current->pid);
return 0;
out_err:
mutex_unlock(&hdev->fpriv_list_lock);
filp->private_data = NULL;
put_pid(hpriv->taskpid);
kfree(hpriv);
return rc;
}
......@@ -339,6 +346,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
set_driver_behavior_per_device(hdev);
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
if (timeout_locked)
hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2018 HabanaLabs, Ltd.
* Copyright 2019-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "gaudiP.h"
#include "../include/gaudi/gaudi_fw_if.h"
#include "habanalabs.h"
void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
{
struct gaudi_device *gaudi = hdev->asic_specific;
if (freq == PLL_LAST)
hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
}
int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
......@@ -33,7 +30,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
*max_clk = (value / 1000 / 1000);
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
if (value < 0) {
dev_err(hdev->dev,
......@@ -51,15 +48,14 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
struct gaudi_device *gaudi = hdev->asic_specific;
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
gaudi->max_freq_value = value;
hdev->asic_prop.max_freq_value = value;
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
......@@ -68,7 +64,6 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct hl_device *hdev = dev_get_drvdata(dev);
struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
u64 value;
......@@ -83,9 +78,10 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
goto fail;
}
gaudi->max_freq_value = value * 1000 * 1000;
hdev->asic_prop.max_freq_value = value * 1000 * 1000;
hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
fail:
return count;
......@@ -100,7 +96,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
return sprintf(buf, "%lu\n", (value / 1000 / 1000));
}
......@@ -108,14 +104,14 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
static DEVICE_ATTR_RW(clk_max_freq_mhz);
static DEVICE_ATTR_RO(clk_cur_freq_mhz);
static struct attribute *gaudi_dev_attrs[] = {
static struct attribute *hl_dev_attrs[] = {
&dev_attr_clk_max_freq_mhz.attr,
&dev_attr_clk_cur_freq_mhz.attr,
NULL,
};
void gaudi_add_device_attr(struct hl_device *hdev,
void hl_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp)
{
dev_attr_grp->attrs = gaudi_dev_attrs;
dev_attr_grp->attrs = hl_dev_attrs;
}
......@@ -113,6 +113,9 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
{
struct hl_device *hdev = dev_get_drvdata(dev);
int rc;
u32 cpucp_attr;
bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
......@@ -121,65 +124,134 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
case hwmon_temp:
switch (attr) {
case hwmon_temp_input:
cpucp_attr = cpucp_temp_input;
break;
case hwmon_temp_max:
cpucp_attr = cpucp_temp_max;
break;
case hwmon_temp_crit:
cpucp_attr = cpucp_temp_crit;
break;
case hwmon_temp_max_hyst:
cpucp_attr = cpucp_temp_max_hyst;
break;
case hwmon_temp_crit_hyst:
cpucp_attr = cpucp_temp_crit_hyst;
break;
case hwmon_temp_offset:
cpucp_attr = cpucp_temp_offset;
break;
case hwmon_temp_highest:
cpucp_attr = cpucp_temp_highest;
break;
default:
return -EINVAL;
}
rc = hl_get_temperature(hdev, channel, attr, val);
if (use_cpucp_enum)
rc = hl_get_temperature(hdev, channel, cpucp_attr, val);
else
rc = hl_get_temperature(hdev, channel, attr, val);
break;
case hwmon_in:
switch (attr) {
case hwmon_in_input:
cpucp_attr = cpucp_in_input;
break;
case hwmon_in_min:
cpucp_attr = cpucp_in_min;
break;
case hwmon_in_max:
cpucp_attr = cpucp_in_max;
break;
case hwmon_in_highest:
cpucp_attr = cpucp_in_highest;
break;
default:
return -EINVAL;
}
rc = hl_get_voltage(hdev, channel, attr, val);
if (use_cpucp_enum)
rc = hl_get_voltage(hdev, channel, cpucp_attr, val);
else
rc = hl_get_voltage(hdev, channel, attr, val);
break;
case hwmon_curr:
switch (attr) {
case hwmon_curr_input:
cpucp_attr = cpucp_curr_input;
break;
case hwmon_curr_min:
cpucp_attr = cpucp_curr_min;
break;
case hwmon_curr_max:
cpucp_attr = cpucp_curr_max;
break;
case hwmon_curr_highest:
cpucp_attr = cpucp_curr_highest;
break;
default:
return -EINVAL;
}
rc = hl_get_current(hdev, channel, attr, val);
if (use_cpucp_enum)
rc = hl_get_current(hdev, channel, cpucp_attr, val);
else
rc = hl_get_current(hdev, channel, attr, val);
break;
case hwmon_fan:
switch (attr) {
case hwmon_fan_input:
cpucp_attr = cpucp_fan_input;
break;
case hwmon_fan_min:
cpucp_attr = cpucp_fan_min;
break;
case hwmon_fan_max:
cpucp_attr = cpucp_fan_max;
break;
default:
return -EINVAL;
}
rc = hl_get_fan_speed(hdev, channel, attr, val);
if (use_cpucp_enum)
rc = hl_get_fan_speed(hdev, channel, cpucp_attr, val);
else
rc = hl_get_fan_speed(hdev, channel, attr, val);
break;
case hwmon_pwm:
switch (attr) {
case hwmon_pwm_input:
cpucp_attr = cpucp_pwm_input;
break;
case hwmon_pwm_enable:
cpucp_attr = cpucp_pwm_enable;
break;
default:
return -EINVAL;
}
if (use_cpucp_enum)
rc = hl_get_pwm_info(hdev, channel, cpucp_attr, val);
else
rc = hl_get_pwm_info(hdev, channel, attr, val);
break;
case hwmon_power:
switch (attr) {
case hwmon_power_input:
cpucp_attr = CPUCP_POWER_INPUT;
break;
case hwmon_power_input_highest:
cpucp_attr = CPUCP_POWER_INPUT_HIGHEST;
break;
default:
return -EINVAL;
}
rc = hl_get_pwm_info(hdev, channel, attr, val);
if (use_cpucp_enum)
rc = hl_get_power(hdev, channel, cpucp_attr, val);
else
rc = hl_get_power(hdev, channel, attr, val);
break;
default:
return -EINVAL;
......@@ -191,6 +263,9 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long val)
{
struct hl_device *hdev = dev_get_drvdata(dev);
u32 cpucp_attr;
bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
......@@ -199,40 +274,78 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
case hwmon_temp:
switch (attr) {
case hwmon_temp_offset:
cpucp_attr = cpucp_temp_offset;
break;
case hwmon_temp_reset_history:
cpucp_attr = cpucp_temp_reset_history;
break;
default:
return -EINVAL;
}
hl_set_temperature(hdev, channel, attr, val);
if (use_cpucp_enum)
hl_set_temperature(hdev, channel, cpucp_attr, val);
else
hl_set_temperature(hdev, channel, attr, val);
break;
case hwmon_pwm:
switch (attr) {
case hwmon_pwm_input:
cpucp_attr = cpucp_pwm_input;
break;
case hwmon_pwm_enable:
cpucp_attr = cpucp_pwm_enable;
break;
default:
return -EINVAL;
}
hl_set_pwm_info(hdev, channel, attr, val);
if (use_cpucp_enum)
hl_set_pwm_info(hdev, channel, cpucp_attr, val);
else
hl_set_pwm_info(hdev, channel, attr, val);
break;
case hwmon_in:
switch (attr) {
case hwmon_in_reset_history:
cpucp_attr = cpucp_in_reset_history;
break;
default:
return -EINVAL;
}
hl_set_voltage(hdev, channel, attr, val);
if (use_cpucp_enum)
hl_set_voltage(hdev, channel, cpucp_attr, val);
else
hl_set_voltage(hdev, channel, attr, val);
break;
case hwmon_curr:
switch (attr) {
case hwmon_curr_reset_history:
cpucp_attr = cpucp_curr_reset_history;
break;
default:
return -EINVAL;
}
hl_set_current(hdev, channel, attr, val);
if (use_cpucp_enum)
hl_set_current(hdev, channel, cpucp_attr, val);
else
hl_set_current(hdev, channel, attr, val);
break;
case hwmon_power:
switch (attr) {
case hwmon_power_reset_history:
cpucp_attr = CPUCP_POWER_RESET_INPUT_HISTORY;
break;
default:
return -EINVAL;
}
if (use_cpucp_enum)
hl_set_power(hdev, channel, cpucp_attr, val);
else
hl_set_power(hdev, channel, attr, val);
break;
default:
return -EINVAL;
......@@ -296,6 +409,15 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
return 0644;
}
break;
case hwmon_power:
switch (attr) {
case hwmon_power_input:
case hwmon_power_input_highest:
return 0444;
case hwmon_power_reset_history:
return 0200;
}
break;
default:
break;
}
......@@ -551,6 +673,60 @@ int hl_set_current(struct hl_device *hdev,
return rc;
}
int hl_set_power(struct hl_device *hdev,
int sensor_index, u32 attr, long value)
{
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
if (rc)
dev_err(hdev->dev,
"Failed to set power of sensor %d, error %d\n",
sensor_index, rc);
return rc;
}
int hl_get_power(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct cpucp_packet pkt;
u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
"Failed to get power of sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
}
return rc;
}
int hl_hwmon_init(struct hl_device *hdev)
{
struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
......
......@@ -141,10 +141,13 @@ static void handle_user_cq(struct hl_device *hdev,
struct hl_user_interrupt *user_cq)
{
struct hl_user_pending_interrupt *pend;
ktime_t now = ktime_get();
spin_lock(&user_cq->wait_list_lock);
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node)
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
pend->fence.timestamp = now;
complete_all(&pend->fence.completion);
}
spin_unlock(&user_cq->wait_list_lock);
}
......
This diff is collapsed.
......@@ -501,23 +501,25 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
!is_power_of_2(prop->dram_page_size)) {
unsigned long dram_page_size = prop->dram_page_size;
u64 page_offset_mask;
u64 phys_addr_mask;
u32 bit;
u64 dram_page_size, dram_base, abs_phys_addr, abs_virt_addr,
page_id, page_start;
u32 page_off;
/*
* find last set bit in page_size to cover all bits of page
* offset. note that 1 has to be added to bit index.
* note that the internal ulong variable is used to avoid
* alignment issue.
* Bit arithmetics cannot be used for non power of two page
* sizes. In addition, since bit arithmetics is not used,
* we cannot ignore dram base. All that shall be considerd.
*/
bit = find_last_bit(&dram_page_size,
sizeof(dram_page_size) * BITS_PER_BYTE) + 1;
page_offset_mask = (BIT_ULL(bit) - 1);
phys_addr_mask = ~page_offset_mask;
*phys_addr = (tmp_phys_addr & phys_addr_mask) |
(virt_addr & page_offset_mask);
dram_page_size = prop->dram_page_size;
dram_base = prop->dram_base_address;
abs_phys_addr = tmp_phys_addr - dram_base;
abs_virt_addr = virt_addr - dram_base;
page_id = DIV_ROUND_DOWN_ULL(abs_phys_addr, dram_page_size);
page_start = page_id * dram_page_size;
div_u64_rem(abs_virt_addr, dram_page_size, &page_off);
*phys_addr = page_start + page_off + dram_base;
} else {
/*
* find the correct hop shift field in hl_mmu_properties
......
......@@ -206,12 +206,12 @@ static ssize_t soft_reset_store(struct device *dev,
goto out;
}
if (!hdev->allow_external_soft_reset) {
dev_err(hdev->dev, "Device does not support soft-reset\n");
if (!hdev->allow_inference_soft_reset) {
dev_err(hdev->dev, "Device does not support inference soft-reset\n");
goto out;
}
dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
dev_warn(hdev->dev, "Inference Soft-Reset requested through sysfs\n");
hl_device_reset(hdev, 0);
......
# SPDX-License-Identifier: GPL-2.0-only
HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \
HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_security.o \
gaudi/gaudi_coresight.o
......@@ -661,6 +661,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
prop->clk_pll_index = HL_GAUDI_MME_PLL;
prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
return 0;
}
......@@ -795,6 +798,7 @@ static int gaudi_early_init(struct hl_device *hdev)
}
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
/* If FW security is enabled at this point it means no access to ELBI */
if (hdev->asic_prop.fw_security_enabled) {
......@@ -1837,8 +1841,6 @@ static int gaudi_sw_init(struct hl_device *hdev)
gaudi->cpucp_info_get = gaudi_cpucp_info_get;
gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
hdev->asic_specific = gaudi;
/* Create DMA pool for small allocations */
......@@ -2616,7 +2618,7 @@ static void gaudi_init_e2e(struct hl_device *hdev)
static void gaudi_init_hbm_cred(struct hl_device *hdev)
{
uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
if (hdev->asic_prop.fw_security_enabled)
return;
......@@ -7932,6 +7934,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
{
struct gaudi_device *gaudi = hdev->asic_specific;
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
u32 fw_fatal_err_flag = 0;
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
>> EQ_CTL_EVENT_TYPE_SHIFT);
bool reset_required;
......@@ -7972,6 +7975,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
gaudi_print_irq_info(hdev, event_type, true);
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
goto reset_device;
case GAUDI_EVENT_GIC500:
......@@ -7979,6 +7983,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_L2_RAM_ECC:
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
gaudi_print_irq_info(hdev, event_type, false);
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
goto reset_device;
case GAUDI_EVENT_HBM0_SPI_0:
......@@ -7989,6 +7994,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
gaudi_hbm_read_interrupts(hdev,
gaudi_hbm_event_to_dev(event_type),
&eq_entry->hbm_ecc_data);
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
goto reset_device;
case GAUDI_EVENT_HBM0_SPI_1:
......@@ -8171,9 +8177,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
reset_device:
if (hdev->asic_prop.fw_security_enabled)
hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
else if (hdev->hard_reset_on_fw_events)
hl_device_reset(hdev, HL_RESET_HARD);
hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
else
hl_fw_unmask_irq(hdev, event_type);
}
......@@ -9439,9 +9445,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
.debugfs_read64 = gaudi_debugfs_read64,
.debugfs_write64 = gaudi_debugfs_write64,
.debugfs_read_dma = gaudi_debugfs_read_dma,
.add_device_attr = gaudi_add_device_attr,
.add_device_attr = hl_add_device_attr,
.handle_eqe = gaudi_handle_eqe,
.set_pll_profile = gaudi_set_pll_profile,
.set_pll_profile = hl_set_pll_profile,
.get_events_stat = gaudi_get_events_stat,
.read_pte = gaudi_read_pte,
.write_pte = gaudi_write_pte,
......@@ -9465,7 +9471,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.halt_coresight = gaudi_halt_coresight,
.ctx_init = gaudi_ctx_init,
.ctx_fini = gaudi_ctx_fini,
.get_clk_rate = gaudi_get_clk_rate,
.get_clk_rate = hl_get_clk_rate,
.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
.load_firmware_to_device = gaudi_load_firmware_to_device,
.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
......
......@@ -319,7 +319,6 @@ struct gaudi_internal_qman_info {
* the actual number of internal queues because they are not in
* consecutive order.
* @hbm_bar_cur_addr: current address of HBM PCI bar.
* @max_freq_value: current max clk frequency.
* @events: array that holds all event id's
* @events_stat: array that holds histogram of all received events.
* @events_stat_aggregate: same as events_stat but doesn't get cleared on reset
......@@ -345,7 +344,6 @@ struct gaudi_device {
struct gaudi_collective_properties collective_props;
u64 hbm_bar_cur_addr;
u64 max_freq_value;
u32 events[GAUDI_EVENT_SIZE];
u32 events_stat[GAUDI_EVENT_SIZE];
......@@ -359,10 +357,8 @@ void gaudi_init_security(struct hl_device *hdev);
void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
void gaudi_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
int gaudi_debug_coresight(struct hl_device *hdev, void *data);
void gaudi_halt_coresight(struct hl_device *hdev);
int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);
#endif /* GAUDIP_H_ */
......@@ -471,6 +471,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->server_type = HL_SERVER_TYPE_UNKNOWN;
prop->clk_pll_index = HL_GOYA_MME_PLL;
return 0;
}
......@@ -622,6 +624,7 @@ static int goya_early_init(struct hl_device *hdev)
}
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);
/* If FW security is enabled at this point it means no access to ELBI */
if (hdev->asic_prop.fw_security_enabled) {
......@@ -959,7 +962,7 @@ static int goya_sw_init(struct hl_device *hdev)
spin_lock_init(&goya->hw_queues_lock);
hdev->supports_coresight = true;
hdev->supports_soft_reset = true;
hdev->allow_external_soft_reset = true;
hdev->allow_inference_soft_reset = true;
hdev->supports_wait_for_multi_cs = false;
hdev->asic_funcs->set_pci_memory_regions(hdev);
......@@ -4829,6 +4832,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
hl_device_reset(hdev, (HL_RESET_HARD |
HL_RESET_FW_FATAL_ERR));
break;
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
......@@ -5649,7 +5658,7 @@ static const struct hl_asic_funcs goya_funcs = {
.halt_coresight = goya_halt_coresight,
.ctx_init = goya_ctx_init,
.ctx_fini = goya_ctx_fini,
.get_clk_rate = goya_get_clk_rate,
.get_clk_rate = hl_get_clk_rate,
.get_queue_id_for_cq = goya_get_queue_id_for_cq,
.load_firmware_to_device = goya_load_firmware_to_device,
.load_boot_fit_to_device = goya_load_boot_fit_to_device,
......
......@@ -235,7 +235,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr);
void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx);
u64 goya_get_device_time(struct hl_device *hdev);
......
......@@ -32,37 +32,6 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
}
}
int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
value);
return value;
}
*max_clk = (value / 1000 / 1000);
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
if (value < 0) {
dev_err(hdev->dev,
"Failed to retrieve device current clock %ld\n",
value);
return value;
}
*cur_clk = (value / 1000 / 1000);
return 0;
}
static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
......
......@@ -542,11 +542,14 @@ enum cpucp_packet_rc {
*/
enum cpucp_temp_type {
cpucp_temp_input,
cpucp_temp_min = 4,
cpucp_temp_min_hyst,
cpucp_temp_max = 6,
cpucp_temp_max_hyst,
cpucp_temp_crit,
cpucp_temp_crit_hyst,
cpucp_temp_offset = 19,
cpucp_temp_lowest = 21,
cpucp_temp_highest = 22,
cpucp_temp_reset_history = 23
};
......@@ -555,6 +558,7 @@ enum cpucp_in_attributes {
cpucp_in_input,
cpucp_in_min,
cpucp_in_max,
cpucp_in_lowest = 6,
cpucp_in_highest = 7,
cpucp_in_reset_history
};
......@@ -563,6 +567,7 @@ enum cpucp_curr_attributes {
cpucp_curr_input,
cpucp_curr_min,
cpucp_curr_max,
cpucp_curr_lowest = 6,
cpucp_curr_highest = 7,
cpucp_curr_reset_history
};
......@@ -598,6 +603,16 @@ enum cpucp_pll_type_attributes {
cpucp_pll_pci,
};
/*
* cpucp_power_type aligns with hwmon_power_attributes
* defined in Linux kernel hwmon.h file
*/
enum cpucp_power_type {
CPUCP_POWER_INPUT = 8,
CPUCP_POWER_INPUT_HIGHEST = 9,
CPUCP_POWER_RESET_INPUT_HISTORY = 11
};
/*
* MSI type enumeration table for all ASICs and future SW versions.
* For future ASIC-LKD compatibility, we can only add new enumerations.
......@@ -731,6 +746,9 @@ struct cpucp_security_info {
* @pll_map: Bit map of supported PLLs for current ASIC version.
* @mme_binning_mask: MME binning mask,
* (0 = functional, 1 = binned)
* @dram_binning_mask: DRAM binning mask, 1 bit per dram instance
* (0 = functional 1 = binned)
* @memory_repair_flag: eFuse flag indicating memory repair
*/
struct cpucp_info {
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
......@@ -749,7 +767,9 @@ struct cpucp_info {
__le64 reserved3;
__le64 reserved4;
__u8 reserved5;
__u8 pad[7];
__u8 dram_binning_mask;
__u8 memory_repair_flag;
__u8 pad[5];
struct cpucp_security_info sec_info;
__le32 reserved6;
__u8 pll_map[PLL_MAP_LEN];
......
......@@ -8,8 +8,6 @@
#ifndef GAUDI_FW_IF_H
#define GAUDI_FW_IF_H
#include <linux/types.h>
#define GAUDI_EVENT_QUEUE_MSI_IDX 8
#define GAUDI_NIC_PORT1_MSI_IDX 10
#define GAUDI_NIC_PORT3_MSI_IDX 12
......@@ -78,13 +76,13 @@ struct gaudi_nic_status {
__u32 high_ber_cnt;
};
struct gaudi_flops_2_data {
struct gaudi_cold_rst_data {
union {
struct {
__u32 spsram_init_done : 1;
__u32 reserved : 31;
u32 spsram_init_done : 1;
u32 reserved : 31;
};
__u32 data;
__le32 data;
};
};
......
......@@ -33,6 +33,7 @@
#define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
#define mmPREBOOT_PCIE_EN mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_1
#define mmCOLD_RST_DATA mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_2
#define mmUPD_PENDING_STS mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_3
#endif /* GAUDI_REG_MAP_H_ */
......@@ -272,6 +272,16 @@ enum hl_gaudi_pll_index {
HL_GAUDI_PLL_MAX
};
/**
* enum hl_device_status - Device status information.
* @HL_DEVICE_STATUS_OPERATIONAL: Device is operational.
* @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset.
* @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable.
* @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled.
* @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in
* progress.
* @HL_DEVICE_STATUS_LAST: Last status.
*/
enum hl_device_status {
HL_DEVICE_STATUS_OPERATIONAL,
HL_DEVICE_STATUS_IN_RESET,
......@@ -556,33 +566,30 @@ enum gaudi_dcores {
HL_GAUDI_ES_DCORE
};
/**
* struct hl_info_args - Main structure to retrieve device related information.
* @return_pointer: User space address of the relevant structure related to HL_INFO_* operation
* mentioned in @op.
* @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it
* limits how many bytes the kernel can write. For hw_events array, the size should be
* hl_info_hw_ip_info.num_of_events * sizeof(__u32).
* @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details.
* @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores).
* @ctx_id: Context ID of the user. Currently not in use.
* @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms
* resolution. Currently not in use.
* @pll_index: Index as defined in hl_<asic type>_pll_index enumeration.
* @pad: Padding to 64 bit.
*/
struct hl_info_args {
/* Location of relevant struct in userspace */
__u64 return_pointer;
/*
* The size of the return value. Just like "size" in "snprintf",
* it limits how many bytes the kernel can write
*
* For hw_events array, the size should be
* hl_info_hw_ip_info.num_of_events * sizeof(__u32)
*/
__u32 return_size;
/* HL_INFO_* */
__u32 op;
union {
/* Dcore id for which the information is relevant.
* For Gaudi refer to 'enum gaudi_dcores'
*/
__u32 dcore_id;
/* Context ID - Currently not in use */
__u32 ctx_id;
/* Period value for utilization rate (100ms - 1000ms, in 100ms
* resolution.
*/
__u32 period_ms;
/* PLL frequency retrieval */
__u32 pll_index;
};
......@@ -890,11 +897,7 @@ struct hl_wait_cs_in {
*/
__u64 addr;
/* Target value for completion comparison */
__u32 target;
/* Absolute timeout to wait for interrupt
* in microseconds
*/
__u32 interrupt_timeout_us;
__u64 target;
};
};
......@@ -910,7 +913,12 @@ struct hl_wait_cs_in {
/* Multi CS API info- valid entries in multi-CS array */
__u8 seq_arr_len;
__u8 pad[7];
__u8 pad[3];
/* Absolute timeout to wait for an interrupt in microseconds.
* Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
*/
__u32 interrupt_timeout_us;
};
#define HL_WAIT_CS_STATUS_COMPLETED 0
......@@ -952,6 +960,10 @@ union hl_wait_cs_args {
#define HL_MEM_OP_UNMAP 3
/* Opcode to map a hw block */
#define HL_MEM_OP_MAP_BLOCK 4
/* Opcode to create DMA-BUF object for an existing device memory allocation
* and to export an FD of that DMA-BUF back to the caller
*/
#define HL_MEM_OP_EXPORT_DMABUF_FD 5
/* Memory flags */
#define HL_MEM_CONTIGUOUS 0x1
......@@ -1023,11 +1035,26 @@ struct hl_mem_in {
/* Virtual address returned from HL_MEM_OP_MAP */
__u64 device_virt_addr;
} unmap;
/* HL_MEM_OP_EXPORT_DMABUF_FD */
struct {
/* Handle returned from HL_MEM_OP_ALLOC. In Gaudi,
* where we don't have MMU for the device memory, the
* driver expects a physical address (instead of
* a handle) in the device memory space.
*/
__u64 handle;
/* Size of memory allocation. Relevant only for GAUDI */
__u64 mem_size;
} export_dmabuf_fd;
};
/* HL_MEM_OP_* */
__u32 op;
/* HL_MEM_* flags */
/* HL_MEM_* flags.
* For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the
* DMA-BUF file/FD flags.
*/
__u32 flags;
/* Context ID - Currently not in use */
__u32 ctx_id;
......@@ -1064,6 +1091,13 @@ struct hl_mem_out {
__u32 pad;
};
/* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the
* DMA-BUF object that was created to describe a memory
* allocation on the device's memory space. The FD should be
* passed to the importer driver
*/
__s32 fd;
};
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment