Commit c40bd7d9 authored by Dave Jiang's avatar Dave Jiang Committed by Vinod Koul

dmaengine: idxd: process user page faults for completion record

DSA supports page fault handling through PRS. However, the DMA engine
that's processing the descriptor is blocked until the PRS response is
received. Other workqueues sharing the engine are also blocked.
Page fault handing by the driver with PRS disabled can be used to
mitigate the stalling.

With PRS disabled while ATS remain enabled, DSA handles page faults on
a completion record by reporting an event in the event log. In this
instance, the descriptor is completed and the event log contains the
completion record address and the contents of the completion record. Add
support to the event log handling code to fault in the completion record
and copy the content of the completion record to user memory.

A bitmap is introduced to keep track of discarded event log entries. When
the user process initiates ->release() of the char device, it no longer is
interested in any remaining event log entries tied to the relevant wq and
PASID. The driver will mark the event log entry index in the bitmap. Upon
encountering the entries during processing, the event log handler will just
clear the bitmap bit and skip the entry rather than attempt to process the
event log entry.
Tested-by: default avatarTony Zhu <tony.zhu@intel.com>
Signed-off-by: default avatarDave Jiang <dave.jiang@intel.com>
Co-developed-by: default avatarFenghua Yu <fenghua.yu@intel.com>
Signed-off-by: default avatarFenghua Yu <fenghua.yu@intel.com>
Link: https://lore.kernel.org/r/20230407203143.2189681-10-fenghua.yu@intel.comSigned-off-by: default avatarVinod Koul <vkoul@kernel.org>
parent b022f597
...@@ -164,6 +164,35 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) ...@@ -164,6 +164,35 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
return rc; return rc;
} }
static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
{
struct idxd_device *idxd = wq->idxd;
struct idxd_evl *evl = idxd->evl;
union evl_status_reg status;
u16 h, t, size;
int ent_size = evl_ent_size(idxd);
struct __evl_entry *entry_head;
if (!evl)
return;
spin_lock(&evl->lock);
status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
t = status.tail;
h = evl->head;
size = evl->size;
while (h != t) {
entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id)
set_bit(h, evl->bmap);
h = (h + 1) % size;
}
spin_unlock(&evl->lock);
drain_workqueue(wq->wq);
}
static int idxd_cdev_release(struct inode *node, struct file *filep) static int idxd_cdev_release(struct inode *node, struct file *filep)
{ {
struct idxd_user_context *ctx = filep->private_data; struct idxd_user_context *ctx = filep->private_data;
...@@ -190,6 +219,7 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) ...@@ -190,6 +219,7 @@ static int idxd_cdev_release(struct inode *node, struct file *filep)
} }
if (ctx->sva) { if (ctx->sva) {
idxd_cdev_evl_drain_pasid(wq, ctx->pasid);
iommu_sva_unbind_device(ctx->sva); iommu_sva_unbind_device(ctx->sva);
idxd_xa_pasid_remove(ctx); idxd_xa_pasid_remove(ctx);
} }
......
...@@ -762,18 +762,29 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) ...@@ -762,18 +762,29 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
dma_addr_t dma_addr; dma_addr_t dma_addr;
int size; int size;
struct idxd_evl *evl = idxd->evl; struct idxd_evl *evl = idxd->evl;
unsigned long *bmap;
int rc;
if (!evl) if (!evl)
return 0; return 0;
size = evl_size(idxd); size = evl_size(idxd);
bmap = bitmap_zalloc(size, GFP_KERNEL);
if (!bmap) {
rc = -ENOMEM;
goto err_bmap;
}
/* /*
* Address needs to be page aligned. However, dma_alloc_coherent() provides * Address needs to be page aligned. However, dma_alloc_coherent() provides
* at minimal page size aligned address. No manual alignment required. * at minimal page size aligned address. No manual alignment required.
*/ */
addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL);
if (!addr) if (!addr) {
return -ENOMEM; rc = -ENOMEM;
goto err_alloc;
}
memset(addr, 0, size); memset(addr, 0, size);
...@@ -781,6 +792,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) ...@@ -781,6 +792,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
evl->log = addr; evl->log = addr;
evl->dma = dma_addr; evl->dma = dma_addr;
evl->log_size = size; evl->log_size = size;
evl->bmap = bmap;
memset(&evlcfg, 0, sizeof(evlcfg)); memset(&evlcfg, 0, sizeof(evlcfg));
evlcfg.bits[0] = dma_addr & GENMASK(63, 12); evlcfg.bits[0] = dma_addr & GENMASK(63, 12);
...@@ -799,6 +811,11 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) ...@@ -799,6 +811,11 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
spin_unlock(&evl->lock); spin_unlock(&evl->lock);
return 0; return 0;
err_alloc:
bitmap_free(bmap);
err_bmap:
return rc;
} }
static void idxd_device_evl_free(struct idxd_device *idxd) static void idxd_device_evl_free(struct idxd_device *idxd)
...@@ -824,6 +841,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd) ...@@ -824,6 +841,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8);
dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); dma_free_coherent(dev, evl->log_size, evl->log, evl->dma);
bitmap_free(evl->bmap);
evl->log = NULL; evl->log = NULL;
evl->size = IDXD_EVL_SIZE_MIN; evl->size = IDXD_EVL_SIZE_MIN;
spin_unlock(&evl->lock); spin_unlock(&evl->lock);
......
...@@ -264,6 +264,7 @@ struct idxd_driver_data { ...@@ -264,6 +264,7 @@ struct idxd_driver_data {
struct device_type *dev_type; struct device_type *dev_type;
int compl_size; int compl_size;
int align; int align;
int evl_cr_off;
}; };
struct idxd_evl { struct idxd_evl {
...@@ -276,6 +277,7 @@ struct idxd_evl { ...@@ -276,6 +277,7 @@ struct idxd_evl {
/* The number of entries in the event log. */ /* The number of entries in the event log. */
u16 size; u16 size;
u16 head; u16 head;
unsigned long *bmap;
}; };
struct idxd_evl_fault { struct idxd_evl_fault {
......
...@@ -46,6 +46,7 @@ static struct idxd_driver_data idxd_driver_data[] = { ...@@ -46,6 +46,7 @@ static struct idxd_driver_data idxd_driver_data[] = {
.compl_size = sizeof(struct dsa_completion_record), .compl_size = sizeof(struct dsa_completion_record),
.align = 32, .align = 32,
.dev_type = &dsa_device_type, .dev_type = &dsa_device_type,
.evl_cr_off = offsetof(struct dsa_evl_entry, cr),
}, },
[IDXD_TYPE_IAX] = { [IDXD_TYPE_IAX] = {
.name_prefix = "iax", .name_prefix = "iax",
...@@ -53,6 +54,7 @@ static struct idxd_driver_data idxd_driver_data[] = { ...@@ -53,6 +54,7 @@ static struct idxd_driver_data idxd_driver_data[] = {
.compl_size = sizeof(struct iax_completion_record), .compl_size = sizeof(struct iax_completion_record),
.align = 64, .align = 64,
.dev_type = &iax_device_type, .dev_type = &iax_device_type,
.evl_cr_off = offsetof(struct iax_evl_entry, cr),
}, },
}; };
......
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/dmaengine.h> #include <linux/dmaengine.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/iommu.h>
#include <linux/sched/mm.h>
#include <uapi/linux/idxd.h> #include <uapi/linux/idxd.h>
#include "../dmaengine.h" #include "../dmaengine.h"
#include "idxd.h" #include "idxd.h"
...@@ -217,14 +219,89 @@ static void idxd_int_handle_revoke(struct work_struct *work) ...@@ -217,14 +219,89 @@ static void idxd_int_handle_revoke(struct work_struct *work)
kfree(revoke); kfree(revoke);
} }
static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head) static void idxd_evl_fault_work(struct work_struct *work)
{
struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work);
struct idxd_wq *wq = fault->wq;
struct idxd_device *idxd = wq->idxd;
struct device *dev = &idxd->pdev->dev;
struct __evl_entry *entry_head = fault->entry;
void *cr = (void *)entry_head + idxd->data->evl_cr_off;
int cr_size = idxd->data->compl_size, copied;
switch (fault->status) {
case DSA_COMP_CRA_XLAT:
case DSA_COMP_DRAIN_EVL:
/*
* Copy completion record to fault_addr in user address space
* that is found by wq and PASID.
*/
copied = idxd_copy_cr(wq, entry_head->pasid,
entry_head->fault_addr,
cr, cr_size);
/*
* The task that triggered the page fault is unknown currently
* because multiple threads may share the user address
* space or the task exits already before this fault.
* So if the copy fails, SIGSEGV can not be sent to the task.
* Just print an error for the failure. The user application
* waiting for the completion record will time out on this
* failure.
*/
if (copied != cr_size) {
dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n",
cr_size, copied);
}
break;
default:
dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n",
DSA_COMP_STATUS(entry_head->error));
break;
}
kmem_cache_free(idxd->evl_cache, fault);
}
static void process_evl_entry(struct idxd_device *idxd,
struct __evl_entry *entry_head, unsigned int index)
{ {
struct device *dev = &idxd->pdev->dev; struct device *dev = &idxd->pdev->dev;
struct idxd_evl *evl = idxd->evl;
u8 status; u8 status;
status = DSA_COMP_STATUS(entry_head->error); if (test_bit(index, evl->bmap)) {
dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", clear_bit(index, evl->bmap);
status, entry_head->operation, entry_head->fault_addr); } else {
status = DSA_COMP_STATUS(entry_head->error);
if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) {
struct idxd_evl_fault *fault;
int ent_size = evl_ent_size(idxd);
if (entry_head->rci)
dev_dbg(dev, "Completion Int Req set, ignoring!\n");
if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL)
return;
fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC);
if (fault) {
struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx];
fault->wq = wq;
fault->status = status;
memcpy(&fault->entry, entry_head, ent_size);
INIT_WORK(&fault->work, idxd_evl_fault_work);
queue_work(wq->wq, &fault->work);
} else {
dev_warn(dev, "Failed to service fault work.\n");
}
} else {
dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n",
status, entry_head->operation,
entry_head->fault_addr);
}
}
} }
static void process_evl_entries(struct idxd_device *idxd) static void process_evl_entries(struct idxd_device *idxd)
...@@ -250,7 +327,7 @@ static void process_evl_entries(struct idxd_device *idxd) ...@@ -250,7 +327,7 @@ static void process_evl_entries(struct idxd_device *idxd)
while (h != t) { while (h != t) {
entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
process_evl_entry(idxd, entry_head); process_evl_entry(idxd, entry_head, h);
h = (h + 1) % size; h = (h + 1) % size;
} }
......
...@@ -135,6 +135,7 @@ enum dsa_completion_status { ...@@ -135,6 +135,7 @@ enum dsa_completion_status {
DSA_COMP_HW_ERR1, DSA_COMP_HW_ERR1,
DSA_COMP_HW_ERR_DRB, DSA_COMP_HW_ERR_DRB,
DSA_COMP_TRANSLATION_FAIL, DSA_COMP_TRANSLATION_FAIL,
DSA_COMP_DRAIN_EVL = 0x26,
}; };
enum iax_completion_status { enum iax_completion_status {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment