Commit b9040c99 authored by Oded Gabbay's avatar Oded Gabbay

habanalabs: fix endianness handling for internal QMAN submission

The PQs of internal H/W queues (QMANs) can be located in different memory
areas for different ASICs. Therefore, when writing PQEs, we need to use
the correct function according to the location of the PQ. e.g. if the PQ
is located in the device's memory (SRAM or DRAM), we need to use
memcpy_toio() so it would work in architectures that have separate
address ranges for IO memory.

This patch makes the code that writes the PQE to be ASIC-specific so we
can handle this properly per ASIC.
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Tested-by: default avatarBen Segal <bpsegal20@gmail.com>
parent 4e87334a
...@@ -2729,9 +2729,10 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) ...@@ -2729,9 +2729,10 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
GOYA_ASYNC_EVENT_ID_PI_UPDATE); GOYA_ASYNC_EVENT_ID_PI_UPDATE);
} }
void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val) void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
{ {
/* Not needed in Goya */ /* The QMANs are on the SRAM so need to copy to IO space */
memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
} }
static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size, static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
...@@ -5048,7 +5049,7 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5048,7 +5049,7 @@ static const struct hl_asic_funcs goya_funcs = {
.resume = goya_resume, .resume = goya_resume,
.cb_mmap = goya_cb_mmap, .cb_mmap = goya_cb_mmap,
.ring_doorbell = goya_ring_doorbell, .ring_doorbell = goya_ring_doorbell,
.flush_pq_write = goya_flush_pq_write, .pqe_write = goya_pqe_write,
.asic_dma_alloc_coherent = goya_dma_alloc_coherent, .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
.asic_dma_free_coherent = goya_dma_free_coherent, .asic_dma_free_coherent = goya_dma_free_coherent,
.get_int_queue_base = goya_get_int_queue_base, .get_int_queue_base = goya_get_int_queue_base,
......
...@@ -177,7 +177,7 @@ int goya_late_init(struct hl_device *hdev); ...@@ -177,7 +177,7 @@ int goya_late_init(struct hl_device *hdev);
void goya_late_fini(struct hl_device *hdev); void goya_late_fini(struct hl_device *hdev);
void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi); void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val); void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd);
void goya_update_eq_ci(struct hl_device *hdev, u32 val); void goya_update_eq_ci(struct hl_device *hdev, u32 val);
void goya_restore_phase_topology(struct hl_device *hdev); void goya_restore_phase_topology(struct hl_device *hdev);
int goya_context_switch(struct hl_device *hdev, u32 asid); int goya_context_switch(struct hl_device *hdev, u32 asid);
......
...@@ -441,7 +441,11 @@ enum hl_pll_frequency { ...@@ -441,7 +441,11 @@ enum hl_pll_frequency {
* @resume: handles IP specific H/W or SW changes for resume. * @resume: handles IP specific H/W or SW changes for resume.
* @cb_mmap: maps a CB. * @cb_mmap: maps a CB.
* @ring_doorbell: increment PI on a given QMAN. * @ring_doorbell: increment PI on a given QMAN.
* @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed. * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
* function because the PQs are located in different memory areas
* per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
* writing the PQE must match the destination memory area
* properties.
* @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
* dma_alloc_coherent(). This is ASIC function because * dma_alloc_coherent(). This is ASIC function because
* its implementation is not trivial when the driver * its implementation is not trivial when the driver
...@@ -510,7 +514,8 @@ struct hl_asic_funcs { ...@@ -510,7 +514,8 @@ struct hl_asic_funcs {
int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u64 kaddress, phys_addr_t paddress, u32 size); u64 kaddress, phys_addr_t paddress, u32 size);
void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi); void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val); void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
struct hl_bd *bd);
void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size, void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle, gfp_t flag); dma_addr_t *dma_handle, gfp_t flag);
void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
......
...@@ -290,23 +290,19 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job) ...@@ -290,23 +290,19 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job)
struct hl_device *hdev = job->cs->ctx->hdev; struct hl_device *hdev = job->cs->ctx->hdev;
struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
struct hl_bd bd; struct hl_bd bd;
u64 *pi, *pbd = (u64 *) &bd; __le64 *pi;
bd.ctl = 0; bd.ctl = 0;
bd.len = __cpu_to_le32(job->job_cb_size); bd.len = cpu_to_le32(job->job_cb_size);
bd.ptr = __cpu_to_le64((u64) (uintptr_t) job->user_cb); bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
pi = (u64 *) (uintptr_t) (q->kernel_address + pi = (__le64 *) (uintptr_t) (q->kernel_address +
((q->pi & (q->int_queue_len - 1)) * sizeof(bd))); ((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
pi[0] = pbd[0];
pi[1] = pbd[1];
q->pi++; q->pi++;
q->pi &= ((q->int_queue_len << 1) - 1); q->pi &= ((q->int_queue_len << 1) - 1);
/* Flush PQ entry write. Relevant only for specific ASICs */ hdev->asic_funcs->pqe_write(hdev, pi, &bd);
hdev->asic_funcs->flush_pq_write(hdev, pi, pbd[0]);
hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment