Commit b9a60d6f authored by Joao Martins's avatar Joao Martins Committed by Jason Gunthorpe

iommufd: Add IOMMU_HWPT_GET_DIRTY_BITMAP

Connect a hw_pagetable to the IOMMU core dirty tracking
read_and_clear_dirty iommu domain op. It exposes all of the functionality
for the UAPI that read the dirtied IOVAs while clearing the Dirty bits from
the PTEs.

In doing so, add an IO pagetable API iopt_read_and_clear_dirty_data() that
performs the reading of dirty IOPTEs for a given IOVA range and then
copying back to userspace bitmap.

Underneath it uses the IOMMU domain kernel API which will read the dirty
bits, as well as atomically clearing the IOPTE dirty bit and flushing the
IOTLB at the end. The IOVA bitmaps usage takes care of the iteration of the
bitmaps user pages efficiently and without copies. Within the iterator
function we iterate over io-pagetable contigous areas that have been
mapped.

Contrary to past incantation of a similar interface in VFIO the IOVA range
to be scanned is tied in to the bitmap size, thus the application needs to
pass a appropriately sized bitmap address taking into account the iova
range being passed *and* page size ... as opposed to allowing bitmap-iova
!= iova.

Link: https://lore.kernel.org/r/20231024135109.73787-8-joao.m.martins@oracle.comSigned-off-by: default avatarJoao Martins <joao.m.martins@oracle.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent e2a4b294
...@@ -220,3 +220,25 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd) ...@@ -220,3 +220,25 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd)
iommufd_put_object(&hwpt->obj); iommufd_put_object(&hwpt->obj);
return rc; return rc;
} }
int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd)
{
struct iommu_hwpt_get_dirty_bitmap *cmd = ucmd->cmd;
struct iommufd_hw_pagetable *hwpt;
struct iommufd_ioas *ioas;
int rc = -EOPNOTSUPP;
if ((cmd->flags || cmd->__reserved))
return -EOPNOTSUPP;
hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id);
if (IS_ERR(hwpt))
return PTR_ERR(hwpt);
ioas = hwpt->ioas;
rc = iopt_read_and_clear_dirty_data(&ioas->iopt, hwpt->domain,
cmd->flags, cmd);
iommufd_put_object(&hwpt->obj);
return rc;
}
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/err.h> #include <linux/err.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <uapi/linux/iommufd.h>
#include "io_pagetable.h" #include "io_pagetable.h"
#include "double_span.h" #include "double_span.h"
...@@ -412,6 +413,118 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, ...@@ -412,6 +413,118 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
return 0; return 0;
} }
struct iova_bitmap_fn_arg {
struct io_pagetable *iopt;
struct iommu_domain *domain;
struct iommu_dirty_bitmap *dirty;
};
static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap,
unsigned long iova, size_t length,
void *opaque)
{
struct iopt_area *area;
struct iopt_area_contig_iter iter;
struct iova_bitmap_fn_arg *arg = opaque;
struct iommu_domain *domain = arg->domain;
struct iommu_dirty_bitmap *dirty = arg->dirty;
const struct iommu_dirty_ops *ops = domain->dirty_ops;
unsigned long last_iova = iova + length - 1;
int ret;
iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) {
unsigned long last = min(last_iova, iopt_area_last_iova(area));
ret = ops->read_and_clear_dirty(domain, iter.cur_iova,
last - iter.cur_iova + 1, 0,
dirty);
if (ret)
return ret;
}
if (!iopt_area_contig_done(&iter))
return -EINVAL;
return 0;
}
static int
iommu_read_and_clear_dirty(struct iommu_domain *domain,
struct io_pagetable *iopt, unsigned long flags,
struct iommu_hwpt_get_dirty_bitmap *bitmap)
{
const struct iommu_dirty_ops *ops = domain->dirty_ops;
struct iommu_iotlb_gather gather;
struct iommu_dirty_bitmap dirty;
struct iova_bitmap_fn_arg arg;
struct iova_bitmap *iter;
int ret = 0;
if (!ops || !ops->read_and_clear_dirty)
return -EOPNOTSUPP;
iter = iova_bitmap_alloc(bitmap->iova, bitmap->length,
bitmap->page_size,
u64_to_user_ptr(bitmap->data));
if (IS_ERR(iter))
return -ENOMEM;
iommu_dirty_bitmap_init(&dirty, iter, &gather);
arg.iopt = iopt;
arg.domain = domain;
arg.dirty = &dirty;
iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty);
iommu_iotlb_sync(domain, &gather);
iova_bitmap_free(iter);
return ret;
}
int iommufd_check_iova_range(struct io_pagetable *iopt,
struct iommu_hwpt_get_dirty_bitmap *bitmap)
{
size_t iommu_pgsize = iopt->iova_alignment;
u64 last_iova;
if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova))
return -EOVERFLOW;
if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX)
return -EOVERFLOW;
if ((bitmap->iova & (iommu_pgsize - 1)) ||
((last_iova + 1) & (iommu_pgsize - 1)))
return -EINVAL;
if (!bitmap->page_size)
return -EINVAL;
if ((bitmap->iova & (bitmap->page_size - 1)) ||
((last_iova + 1) & (bitmap->page_size - 1)))
return -EINVAL;
return 0;
}
int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
struct iommu_domain *domain,
unsigned long flags,
struct iommu_hwpt_get_dirty_bitmap *bitmap)
{
int ret;
ret = iommufd_check_iova_range(iopt, bitmap);
if (ret)
return ret;
down_read(&iopt->iova_rwsem);
ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap);
up_read(&iopt->iova_rwsem);
return ret;
}
static int iopt_clear_dirty_data(struct io_pagetable *iopt, static int iopt_clear_dirty_data(struct io_pagetable *iopt,
struct iommu_domain *domain) struct iommu_domain *domain)
{ {
......
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
#include <linux/xarray.h> #include <linux/xarray.h>
#include <linux/refcount.h> #include <linux/refcount.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/iommu.h>
#include <linux/iova_bitmap.h>
#include <uapi/linux/iommufd.h> #include <uapi/linux/iommufd.h>
struct iommu_domain; struct iommu_domain;
...@@ -71,6 +73,10 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, ...@@ -71,6 +73,10 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
unsigned long length, unsigned long *unmapped); unsigned long length, unsigned long *unmapped);
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
struct iommu_domain *domain,
unsigned long flags,
struct iommu_hwpt_get_dirty_bitmap *bitmap);
int iopt_set_dirty_tracking(struct io_pagetable *iopt, int iopt_set_dirty_tracking(struct io_pagetable *iopt,
struct iommu_domain *domain, bool enable); struct iommu_domain *domain, bool enable);
...@@ -226,6 +232,8 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd, ...@@ -226,6 +232,8 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd,
struct iommufd_ctx *ictx); struct iommufd_ctx *ictx);
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
int iommufd_check_iova_range(struct io_pagetable *iopt,
struct iommu_hwpt_get_dirty_bitmap *bitmap);
/* /*
* A HW pagetable is called an iommu_domain inside the kernel. This user object * A HW pagetable is called an iommu_domain inside the kernel. This user object
...@@ -252,6 +260,8 @@ iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id) ...@@ -252,6 +260,8 @@ iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
struct iommufd_hw_pagetable, obj); struct iommufd_hw_pagetable, obj);
} }
int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd);
int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
struct iommufd_hw_pagetable * struct iommufd_hw_pagetable *
iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
struct iommufd_device *idev, u32 flags, struct iommufd_device *idev, u32 flags,
......
...@@ -307,6 +307,7 @@ union ucmd_buffer { ...@@ -307,6 +307,7 @@ union ucmd_buffer {
struct iommu_destroy destroy; struct iommu_destroy destroy;
struct iommu_hw_info info; struct iommu_hw_info info;
struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_alloc hwpt;
struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap;
struct iommu_hwpt_set_dirty_tracking set_dirty_tracking; struct iommu_hwpt_set_dirty_tracking set_dirty_tracking;
struct iommu_ioas_alloc alloc; struct iommu_ioas_alloc alloc;
struct iommu_ioas_allow_iovas allow_iovas; struct iommu_ioas_allow_iovas allow_iovas;
...@@ -343,6 +344,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { ...@@ -343,6 +344,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
__reserved), __reserved),
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
__reserved), __reserved),
IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap,
struct iommu_hwpt_get_dirty_bitmap, data),
IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking, IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking,
struct iommu_hwpt_set_dirty_tracking, __reserved), struct iommu_hwpt_set_dirty_tracking, __reserved),
IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
...@@ -555,5 +558,6 @@ MODULE_ALIAS_MISCDEV(VFIO_MINOR); ...@@ -555,5 +558,6 @@ MODULE_ALIAS_MISCDEV(VFIO_MINOR);
MODULE_ALIAS("devname:vfio/vfio"); MODULE_ALIAS("devname:vfio/vfio");
#endif #endif
MODULE_IMPORT_NS(IOMMUFD_INTERNAL); MODULE_IMPORT_NS(IOMMUFD_INTERNAL);
MODULE_IMPORT_NS(IOMMUFD);
MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
...@@ -48,6 +48,7 @@ enum { ...@@ -48,6 +48,7 @@ enum {
IOMMUFD_CMD_HWPT_ALLOC, IOMMUFD_CMD_HWPT_ALLOC,
IOMMUFD_CMD_GET_HW_INFO, IOMMUFD_CMD_GET_HW_INFO,
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
}; };
/** /**
...@@ -481,4 +482,38 @@ struct iommu_hwpt_set_dirty_tracking { ...@@ -481,4 +482,38 @@ struct iommu_hwpt_set_dirty_tracking {
}; };
#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
/**
* struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
* @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
* @hwpt_id: HW pagetable ID that represents the IOMMU domain
* @flags: Must be zero
* @__reserved: Must be 0
* @iova: base IOVA of the bitmap first bit
* @length: IOVA range size
* @page_size: page size granularity of each bit in the bitmap
* @data: bitmap where to set the dirty bits. The bitmap bits each
* represent a page_size which you deviate from an arbitrary iova.
*
* Checking a given IOVA is dirty:
*
* data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
*
* Walk the IOMMU pagetables for a given IOVA range to return a bitmap
* with the dirty IOVAs. In doing so it will also by default clear any
* dirty bit metadata set in the IOPTE.
*/
struct iommu_hwpt_get_dirty_bitmap {
__u32 size;
__u32 hwpt_id;
__u32 flags;
__u32 __reserved;
__aligned_u64 iova;
__aligned_u64 length;
__aligned_u64 page_size;
__aligned_u64 data;
};
#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment