Commit 16102736 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2019-05-03' of...

Merge tag 'misc-habanalabs-next-2019-05-03' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains further changes for kernel 5.2.

The changes are either bug fixes or simple re-factoring of existing code.
The notable changes are:

- Add missing fields in the bmon structure that is passed in the debug
  IOCTL when the user wants to configure the bus monitor.

- Use the dedicated device-CPU accessible memory pool for all host memory
  allocations that are accessible directly by the embedded CPU. This is
  needed to enforce certain restrictions we have due to the embedded CPU's
  architecture.

- Manipulate DMA addresses only inside ASIC-specific files. This is needed
  to better support future ASICs code.

Other minor changes include:

- Move pr_fmt() to c files to avoid dependency in include order.

- Remove call to CS parsing function for workloads that originates from
  the driver and remove dead code as a result from this change.

- Update names of structure members and labels to better reflect their
  usage.

- When moving the dram PCI bar aperture, return the old aperture address
  range instead of error code. This will allow us to restore the old
  address range in a simpler fashion.

* tag 'misc-habanalabs-next-2019-05-03' of git://people.freedesktop.org/~gabbayo/linux:
  habanalabs: Update CPU DMA memory label name
  habanalabs: Update CPU DMA pool label name
  habanalabs: increase timeout if working with simulator
  habanalabs: remove condition that is always true
  habanalabs: remove redundant member from parser struct
  habanalabs: Manipulate DMA addresses in ASIC functions
  habanalabs: rename functions to improve code readability
  habanalabs: remove call to cs_parser()
  habanalabs: Use single pool for CPU accessible host memory
  habanalabs: return old dram bar address upon change
  habanalabs: rename restore to ctx_switch when appropriate
  habanalabs: use ASIC functions interface for rreg/wreg
  uapi/habanalabs: add missing fields in bmon params
  habanalabs: re-factor goya_parse_cb_no_ext_queue()
  habanalabs: Cancel pr_fmt() definition dependency on includes order
parents 78e6427b 9f832fda
......@@ -13,7 +13,7 @@
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
{
hdev->asic_funcs->dma_free_coherent(hdev, cb->size,
hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
(void *) (uintptr_t) cb->kernel_address,
cb->bus_address);
kfree(cb);
......@@ -66,10 +66,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
return NULL;
if (ctx_id == HL_KERNEL_ASID_ID)
p = hdev->asic_funcs->dma_alloc_coherent(hdev, cb_size,
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address, GFP_ATOMIC);
else
p = hdev->asic_funcs->dma_alloc_coherent(hdev, cb_size,
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address,
GFP_USER | __GFP_ZERO);
if (!p) {
......
......@@ -93,7 +93,6 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
parser.user_cb_size = job->user_cb_size;
parser.ext_queue = job->ext_queue;
job->patched_cb = NULL;
parser.use_virt_addr = hdev->mmu_enable;
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
if (job->ext_queue) {
......@@ -601,7 +600,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
void __user *chunks;
u32 num_chunks;
u64 cs_seq = ULONG_MAX;
int rc, do_restore;
int rc, do_ctx_switch;
bool need_soft_reset = false;
if (hl_device_disabled_or_in_reset(hdev)) {
......@@ -612,9 +611,9 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
goto out;
}
do_restore = atomic_cmpxchg(&ctx->thread_restore_token, 1, 0);
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
if (do_restore || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
long ret;
chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
......@@ -622,7 +621,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
mutex_lock(&hpriv->restore_phase_mutex);
if (do_restore) {
if (do_ctx_switch) {
rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
if (rc) {
dev_err_ratelimited(hdev->dev,
......@@ -678,18 +677,18 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
}
}
ctx->thread_restore_wait_token = 1;
} else if (!ctx->thread_restore_wait_token) {
ctx->thread_ctx_switch_wait_token = 1;
} else if (!ctx->thread_ctx_switch_wait_token) {
u32 tmp;
rc = hl_poll_timeout_memory(hdev,
(u64) (uintptr_t) &ctx->thread_restore_wait_token,
(u64) (uintptr_t) &ctx->thread_ctx_switch_wait_token,
jiffies_to_usecs(hdev->timeout_jiffies),
&tmp);
if (rc || !tmp) {
dev_err(hdev->dev,
"restore phase hasn't finished in time\n");
"context switch phase didn't finish in time\n");
rc = -ETIMEDOUT;
goto out;
}
......
......@@ -106,8 +106,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
ctx->cs_sequence = 1;
spin_lock_init(&ctx->cs_lock);
atomic_set(&ctx->thread_restore_token, 1);
ctx->thread_restore_wait_token = 0;
atomic_set(&ctx->thread_ctx_switch_token, 1);
ctx->thread_ctx_switch_wait_token = 0;
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
......
......@@ -5,6 +5,8 @@
* All Rights Reserved.
*/
#define pr_fmt(fmt) "habanalabs: " fmt
#include "habanalabs.h"
#include <linux/pci.h>
......@@ -708,10 +710,10 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_reset(hdev, &hdev->completion_queue[i]);
/* Make sure the setup phase for the user context will run again */
/* Make sure the context switch phase will run again */
if (hdev->user_ctx) {
atomic_set(&hdev->user_ctx->thread_restore_token, 1);
hdev->user_ctx->thread_restore_wait_token = 0;
atomic_set(&hdev->user_ctx->thread_ctx_switch_token, 1);
hdev->user_ctx->thread_ctx_switch_wait_token = 0;
}
/* Finished tear-down, starting to re-initialize */
......@@ -1145,7 +1147,13 @@ int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
* either by the direct access of the device or by another core
*/
u32 *paddr = (u32 *) (uintptr_t) addr;
ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
ktime_t timeout;
/* timeout should be longer when working with simulator */
if (!hdev->pdev)
timeout_us *= 10;
timeout = ktime_add_us(ktime_get(), timeout_us);
might_sleep();
......
......@@ -249,8 +249,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.addr = cpu_to_le64(armcp_info_dma_addr +
prop->host_phys_base_address);
pkt.addr = cpu_to_le64(armcp_info_dma_addr);
pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
......@@ -281,7 +280,6 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct armcp_packet pkt = {};
void *eeprom_info_cpu_addr;
dma_addr_t eeprom_info_dma_addr;
......@@ -301,8 +299,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.addr = cpu_to_le64(eeprom_info_dma_addr +
prop->host_phys_base_address);
pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
pkt.data_max_size = cpu_to_le32(max_size);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
......
This diff is collapsed.
......@@ -147,9 +147,6 @@ enum goya_fw_component {
};
struct goya_device {
void (*mmu_prepare_reg)(struct hl_device *hdev, u64 reg, u32 asid);
void (*qman0_set_security)(struct hl_device *hdev, bool secure);
/* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock;
......@@ -162,13 +159,34 @@ struct goya_device {
u32 hw_cap_initialized;
};
void goya_get_fixed_properties(struct hl_device *hdev);
int goya_mmu_init(struct hl_device *hdev);
void goya_init_dma_qmans(struct hl_device *hdev);
void goya_init_mme_qmans(struct hl_device *hdev);
void goya_init_tpc_qmans(struct hl_device *hdev);
int goya_init_cpu_queues(struct hl_device *hdev);
void goya_init_security(struct hl_device *hdev);
int goya_late_init(struct hl_device *hdev);
void goya_late_fini(struct hl_device *hdev);
void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val);
void goya_update_eq_ci(struct hl_device *hdev, u32 val);
void goya_restore_phase_topology(struct hl_device *hdev);
int goya_context_switch(struct hl_device *hdev, u32 asid);
int goya_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus,
u8 i2c_addr, u8 i2c_reg, u32 *val);
int goya_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus,
u8 i2c_addr, u8 i2c_reg, u32 val);
void goya_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state);
int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id);
int goya_test_queues(struct hl_device *hdev);
int goya_test_cpu_queue(struct hl_device *hdev);
int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
u32 timeout, long *result);
long goya_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
long goya_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr);
long goya_get_current(struct hl_device *hdev, int sensor_index, u32 attr);
......@@ -176,33 +194,31 @@ long goya_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr);
long goya_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr);
void goya_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value);
void goya_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state);
u64 goya_get_max_power(struct hl_device *hdev);
void goya_set_max_power(struct hl_device *hdev, u64 value);
void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
int goya_armcp_info_get(struct hl_device *hdev);
void goya_init_security(struct hl_device *hdev);
int goya_debug_coresight(struct hl_device *hdev, void *data);
u64 goya_get_max_power(struct hl_device *hdev);
void goya_set_max_power(struct hl_device *hdev, u64 value);
int goya_test_queues(struct hl_device *hdev);
void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
int goya_mmu_clear_pgt_range(struct hl_device *hdev);
int goya_mmu_set_dram_default_page(struct hl_device *hdev);
void goya_late_fini(struct hl_device *hdev);
int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);
void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val);
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
u32 cq_val, u32 msix_vec);
int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser);
void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
dma_addr_t *dma_handle, u16 *queue_len);
dma_addr_t *dma_handle, u16 *queue_len);
u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt);
int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id);
int goya_send_heartbeat(struct hl_device *hdev);
void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle);
......
......@@ -459,10 +459,14 @@ static int goya_config_bmon(struct hl_device *hdev,
if (!input)
return -EINVAL;
WREG32(base_reg + 0x208, lower_32_bits(input->addr_range0));
WREG32(base_reg + 0x20C, upper_32_bits(input->addr_range0));
WREG32(base_reg + 0x248, lower_32_bits(input->addr_range1));
WREG32(base_reg + 0x24C, upper_32_bits(input->addr_range1));
WREG32(base_reg + 0x200, lower_32_bits(input->start_addr0));
WREG32(base_reg + 0x204, upper_32_bits(input->start_addr0));
WREG32(base_reg + 0x208, lower_32_bits(input->addr_mask0));
WREG32(base_reg + 0x20C, upper_32_bits(input->addr_mask0));
WREG32(base_reg + 0x240, lower_32_bits(input->start_addr1));
WREG32(base_reg + 0x244, upper_32_bits(input->start_addr1));
WREG32(base_reg + 0x248, lower_32_bits(input->addr_mask1));
WREG32(base_reg + 0x24C, upper_32_bits(input->addr_mask1));
WREG32(base_reg + 0x224, 0);
WREG32(base_reg + 0x234, 0);
WREG32(base_reg + 0x30C, input->bw_win);
......@@ -482,8 +486,12 @@ static int goya_config_bmon(struct hl_device *hdev,
WREG32(base_reg + 0x100, 0x11);
WREG32(base_reg + 0x304, 0x1);
} else {
WREG32(base_reg + 0x200, 0);
WREG32(base_reg + 0x204, 0);
WREG32(base_reg + 0x208, 0xFFFFFFFF);
WREG32(base_reg + 0x20C, 0xFFFFFFFF);
WREG32(base_reg + 0x240, 0);
WREG32(base_reg + 0x244, 0);
WREG32(base_reg + 0x248, 0xFFFFFFFF);
WREG32(base_reg + 0x24C, 0xFFFFFFFF);
WREG32(base_reg + 0x224, 0xFFFFFFFF);
......
This diff is collapsed.
......@@ -6,6 +6,8 @@
*
*/
#define pr_fmt(fmt) "habanalabs: " fmt
#include "habanalabs.h"
#include <linux/pci.h>
......
......@@ -82,7 +82,7 @@ static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
bd += hl_pi_2_offset(q->pi);
bd->ctl = __cpu_to_le32(ctl);
bd->len = __cpu_to_le32(len);
bd->ptr = __cpu_to_le64(ptr + hdev->asic_prop.host_phys_base_address);
bd->ptr = __cpu_to_le64(ptr);
q->pi = hl_queue_inc_ptr(q->pi);
hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
......@@ -263,9 +263,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
* checked in hl_queue_sanity_checks
*/
cq = &hdev->completion_queue[q->hw_queue_id];
cq_addr = cq->bus_address +
hdev->asic_prop.host_phys_base_address;
cq_addr += cq->pi * sizeof(struct hl_cq_entry);
cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
hdev->asic_funcs->add_end_of_cb_packets(cb->kernel_address, len,
cq_addr,
......@@ -415,14 +413,20 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
}
static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
struct hl_hw_queue *q)
struct hl_hw_queue *q, bool is_cpu_queue)
{
void *p;
int rc;
p = hdev->asic_funcs->dma_alloc_coherent(hdev,
HL_QUEUE_SIZE_IN_BYTES,
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
if (is_cpu_queue)
p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
HL_QUEUE_SIZE_IN_BYTES,
&q->bus_address);
else
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
HL_QUEUE_SIZE_IN_BYTES,
&q->bus_address,
GFP_KERNEL | __GFP_ZERO);
if (!p)
return -ENOMEM;
......@@ -446,8 +450,15 @@ static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
return 0;
free_queue:
hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address, q->bus_address);
if (is_cpu_queue)
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
HL_QUEUE_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address);
else
hdev->asic_funcs->asic_dma_free_coherent(hdev,
HL_QUEUE_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address,
q->bus_address);
return rc;
}
......@@ -474,12 +485,12 @@ static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
{
return ext_and_cpu_hw_queue_init(hdev, q);
return ext_and_cpu_hw_queue_init(hdev, q, true);
}
static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
{
return ext_and_cpu_hw_queue_init(hdev, q);
return ext_and_cpu_hw_queue_init(hdev, q, false);
}
/*
......@@ -569,8 +580,15 @@ static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
kfree(q->shadow_queue);
hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address, q->bus_address);
if (q->queue_type == QUEUE_TYPE_CPU)
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
HL_QUEUE_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address);
else
hdev->asic_funcs->asic_dma_free_coherent(hdev,
HL_QUEUE_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address,
q->bus_address);
}
int hl_hw_queues_create(struct hl_device *hdev)
......
......@@ -300,14 +300,6 @@ enum armcp_pwm_attributes {
armcp_pwm_enable
};
#define HL_CPU_PKT_SHIFT 5
#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT)
#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1))
#define HL_CPU_MAX_PKTS_IN_CB 32
#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \
HL_CPU_MAX_PKTS_IN_CB)
#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE)
/* Event Queue Packets */
struct eq_generic_event {
......
......@@ -222,7 +222,7 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
if (!p)
return -ENOMEM;
......@@ -248,7 +248,7 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
*/
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
{
hdev->asic_funcs->dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address, q->bus_address);
}
......@@ -284,8 +284,9 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_EQ_SIZE_IN_BYTES,
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
HL_EQ_SIZE_IN_BYTES,
&q->bus_address);
if (!p)
return -ENOMEM;
......@@ -308,8 +309,9 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
{
flush_workqueue(hdev->eq_wq);
hdev->asic_funcs->dma_free_coherent(hdev, HL_EQ_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address, q->bus_address);
hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
HL_EQ_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address);
}
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
......
......@@ -759,10 +759,6 @@ static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i];
/* For accessing the host we need to turn on bit 39 */
if (phys_pg_pack->created_from_userptr)
paddr += hdev->asic_prop.host_phys_base_address;
rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
if (rc) {
dev_err(hdev->dev,
......
......@@ -236,6 +236,8 @@ int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
* @hdev: Pointer to hl_device structure.
* @sram_base_address: SRAM base address.
* @dram_base_address: DRAM base address.
* @host_phys_base_address: Base physical address of host memory for device
* transactions.
* @host_phys_size: Size of host memory for device transactions.
*
* This is needed in case the firmware doesn't initialize the iATU.
......@@ -243,7 +245,8 @@ int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
* Return: 0 on success, negative value for failure.
*/
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
u64 dram_base_address, u64 host_phys_size)
u64 dram_base_address, u64 host_phys_base_address,
u64 host_phys_size)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 host_phys_end_addr;
......@@ -259,14 +262,17 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
/* Point to DRAM */
if (!hdev->asic_funcs->set_dram_bar_base)
return -EINVAL;
rc |= hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address);
if (hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address) ==
U64_MAX)
return -EIO;
/* Outbound Region 0 - Point to Host */
host_phys_end_addr = prop->host_phys_base_address + host_phys_size - 1;
host_phys_end_addr = host_phys_base_address + host_phys_size - 1;
rc |= hl_pci_iatu_write(hdev, 0x008,
lower_32_bits(prop->host_phys_base_address));
lower_32_bits(host_phys_base_address));
rc |= hl_pci_iatu_write(hdev, 0x00C,
upper_32_bits(prop->host_phys_base_address));
upper_32_bits(host_phys_base_address));
rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr));
rc |= hl_pci_iatu_write(hdev, 0x014, 0);
rc |= hl_pci_iatu_write(hdev, 0x018, 0);
......
......@@ -374,9 +374,12 @@ struct hl_debug_params_stm {
};
struct hl_debug_params_bmon {
/* Transaction address filter */
__u64 addr_range0;
__u64 addr_range1;
/* Two address ranges that the user can request to filter */
__u64 start_addr0;
__u64 addr_mask0;
__u64 start_addr1;
__u64 addr_mask1;
/* Capture window configuration */
__u32 bw_win;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment