Commit 56e28775 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-fixes-2020-06-24' of...

Merge tag 'misc-habanalabs-fixes-2020-06-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-linus

Oded writes:

This tag contains the following fixes for kernel 5.8-rc2:

- close security hole in GAUDI command buffer parsing by blocking an
  instruction that might allow user to run command buffer that wasn't
  parsed on a secured engine.

- Fix bug in GAUDI MMU cache invalidation code.

- Rename a function to resolve conflict with a static inline function in
  arch/m68k/include/asm/mcfmmu.h

- Increase watchdog timeout of GAUDI QMAN arbitration H/W to prevent false
  reports on timeouts

- Fix bug of dereferencing NULL pointer when an error occurs during command
  submission

- Increase H/W timer for checking if PDMA engine is IDLE in GAUDI.

* tag 'misc-habanalabs-fixes-2020-06-24' of git://people.freedesktop.org/~gabbayo/linux:
  habanalabs: increase h/w timer when checking idle
  habanalabs: Correct handling when failing to enqueue CB
  habanalabs: increase GAUDI QMAN ARB WDT timeout
  habanalabs: rename mmu_write() to mmu_asid_va_write()
  habanalabs: use PI in MMU cache invalidation
  habanalabs: block scalar load_and_exe on external queue
parents 8c289ea0 ce04326e
...@@ -62,6 +62,12 @@ static void hl_fence_release(struct dma_fence *fence) ...@@ -62,6 +62,12 @@ static void hl_fence_release(struct dma_fence *fence)
container_of(fence, struct hl_cs_compl, base_fence); container_of(fence, struct hl_cs_compl, base_fence);
struct hl_device *hdev = hl_cs_cmpl->hdev; struct hl_device *hdev = hl_cs_cmpl->hdev;
/* EBUSY means the CS was never submitted and hence we don't have
* an attached hw_sob object that we should handle here
*/
if (fence->error == -EBUSY)
goto free;
if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
(hl_cs_cmpl->type == CS_TYPE_WAIT)) { (hl_cs_cmpl->type == CS_TYPE_WAIT)) {
...@@ -92,6 +98,7 @@ static void hl_fence_release(struct dma_fence *fence) ...@@ -92,6 +98,7 @@ static void hl_fence_release(struct dma_fence *fence)
kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
} }
free:
kfree_rcu(hl_cs_cmpl, base_fence.rcu); kfree_rcu(hl_cs_cmpl, base_fence.rcu);
} }
...@@ -328,10 +335,16 @@ static void cs_do_release(struct kref *ref) ...@@ -328,10 +335,16 @@ static void cs_do_release(struct kref *ref)
hl_ctx_put(cs->ctx); hl_ctx_put(cs->ctx);
/* We need to mark an error for not submitted because in that case
* the dma fence release flow is different. Mainly, we don't need
* to handle hw_sob for signal/wait
*/
if (cs->timedout) if (cs->timedout)
dma_fence_set_error(cs->fence, -ETIMEDOUT); dma_fence_set_error(cs->fence, -ETIMEDOUT);
else if (cs->aborted) else if (cs->aborted)
dma_fence_set_error(cs->fence, -EIO); dma_fence_set_error(cs->fence, -EIO);
else if (!cs->submitted)
dma_fence_set_error(cs->fence, -EBUSY);
dma_fence_signal(cs->fence); dma_fence_signal(cs->fence);
dma_fence_put(cs->fence); dma_fence_put(cs->fence);
......
...@@ -480,7 +480,7 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -480,7 +480,7 @@ static int mmu_show(struct seq_file *s, void *data)
return 0; return 0;
} }
static ssize_t mmu_write(struct file *file, const char __user *buf, static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
size_t count, loff_t *f_pos) size_t count, loff_t *f_pos)
{ {
struct seq_file *s = file->private_data; struct seq_file *s = file->private_data;
...@@ -1125,7 +1125,7 @@ static const struct hl_info_list hl_debugfs_list[] = { ...@@ -1125,7 +1125,7 @@ static const struct hl_info_list hl_debugfs_list[] = {
{"command_submission_jobs", command_submission_jobs_show, NULL}, {"command_submission_jobs", command_submission_jobs_show, NULL},
{"userptr", userptr_show, NULL}, {"userptr", userptr_show, NULL},
{"vm", vm_show, NULL}, {"vm", vm_show, NULL},
{"mmu", mmu_show, mmu_write}, {"mmu", mmu_show, mmu_asid_va_write},
{"engines", engines_show, NULL} {"engines", engines_show, NULL}
}; };
......
...@@ -96,7 +96,7 @@ ...@@ -96,7 +96,7 @@
#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
#define GAUDI_ARB_WDT_TIMEOUT 0x400000 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
...@@ -1893,6 +1893,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, ...@@ -1893,6 +1893,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
/* The following configuration is needed only once per QMAN */ /* The following configuration is needed only once per QMAN */
if (qman_id == 0) { if (qman_id == 0) {
/* Configure RAZWI IRQ */ /* Configure RAZWI IRQ */
...@@ -2725,6 +2727,12 @@ static int gaudi_mmu_init(struct hl_device *hdev) ...@@ -2725,6 +2727,12 @@ static int gaudi_mmu_init(struct hl_device *hdev)
WREG32(mmSTLB_HOP_CONFIGURATION, WREG32(mmSTLB_HOP_CONFIGURATION,
hdev->mmu_huge_page_opt ? 0x30440 : 0x40440); hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
/*
* The H/W expects the first PI after init to be 1. After wraparound
* we'll write 0.
*/
gaudi->mmu_cache_inv_pi = 1;
gaudi->hw_cap_initialized |= HW_CAP_MMU; gaudi->hw_cap_initialized |= HW_CAP_MMU;
return 0; return 0;
...@@ -3790,6 +3798,25 @@ static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, ...@@ -3790,6 +3798,25 @@ static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
src_in_host); src_in_host);
} }
static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
struct hl_cs_parser *parser,
struct packet_load_and_exe *user_pkt)
{
u32 cfg;
cfg = le32_to_cpu(user_pkt->cfg);
if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
dev_err(hdev->dev,
"User not allowed to use Load and Execute\n");
return -EPERM;
}
parser->patched_cb_size += sizeof(struct packet_load_and_exe);
return 0;
}
static int gaudi_validate_cb(struct hl_device *hdev, static int gaudi_validate_cb(struct hl_device *hdev,
struct hl_cs_parser *parser, bool is_mmu) struct hl_cs_parser *parser, bool is_mmu)
{ {
...@@ -3838,6 +3865,11 @@ static int gaudi_validate_cb(struct hl_device *hdev, ...@@ -3838,6 +3865,11 @@ static int gaudi_validate_cb(struct hl_device *hdev,
rc = -EPERM; rc = -EPERM;
break; break;
case PACKET_LOAD_AND_EXE:
rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
(struct packet_load_and_exe *) user_pkt);
break;
case PACKET_LIN_DMA: case PACKET_LIN_DMA:
parser->contains_dma_pkt = true; parser->contains_dma_pkt = true;
if (is_mmu) if (is_mmu)
...@@ -3855,7 +3887,6 @@ static int gaudi_validate_cb(struct hl_device *hdev, ...@@ -3855,7 +3887,6 @@ static int gaudi_validate_cb(struct hl_device *hdev,
case PACKET_FENCE: case PACKET_FENCE:
case PACKET_NOP: case PACKET_NOP:
case PACKET_ARB_POINT: case PACKET_ARB_POINT:
case PACKET_LOAD_AND_EXE:
parser->patched_cb_size += pkt_size; parser->patched_cb_size += pkt_size;
break; break;
...@@ -5994,6 +6025,8 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, ...@@ -5994,6 +6025,8 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
mutex_lock(&hdev->mmu_cache_lock); mutex_lock(&hdev->mmu_cache_lock);
/* L0 & L1 invalidation */ /* L0 & L1 invalidation */
WREG32(mmSTLB_INV_PS, 3);
WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
WREG32(mmSTLB_INV_PS, 2); WREG32(mmSTLB_INV_PS, 2);
rc = hl_poll_timeout( rc = hl_poll_timeout(
......
...@@ -229,6 +229,8 @@ struct gaudi_internal_qman_info { ...@@ -229,6 +229,8 @@ struct gaudi_internal_qman_info {
* @multi_msi_mode: whether we are working in multi MSI single MSI mode. * @multi_msi_mode: whether we are working in multi MSI single MSI mode.
* Multi MSI is possible only with IOMMU enabled. * Multi MSI is possible only with IOMMU enabled.
* @ext_queue_idx: helper index for external queues initialization. * @ext_queue_idx: helper index for external queues initialization.
* @mmu_cache_inv_pi: PI for MMU cache invalidation flow. The H/W expects an
* 8-bit value so use u8.
*/ */
struct gaudi_device { struct gaudi_device {
int (*armcp_info_get)(struct hl_device *hdev); int (*armcp_info_get)(struct hl_device *hdev);
...@@ -248,6 +250,7 @@ struct gaudi_device { ...@@ -248,6 +250,7 @@ struct gaudi_device {
u32 hw_cap_initialized; u32 hw_cap_initialized;
u8 multi_msi_mode; u8 multi_msi_mode;
u8 ext_queue_idx; u8 ext_queue_idx;
u8 mmu_cache_inv_pi;
}; };
void gaudi_init_security(struct hl_device *hdev); void gaudi_init_security(struct hl_device *hdev);
......
...@@ -197,6 +197,9 @@ struct packet_wait { ...@@ -197,6 +197,9 @@ struct packet_wait {
__le32 ctl; __le32 ctl;
}; };
#define GAUDI_PKT_LOAD_AND_EXE_CFG_DST_SHIFT 0
#define GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK 0x00000001
struct packet_load_and_exe { struct packet_load_and_exe {
__le32 cfg; __le32 cfg;
__le32 ctl; __le32 ctl;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment