Commit 750e2e90 authored by Joao Martins's avatar Joao Martins Committed by Jason Gunthorpe

iommu: Add iommu_domain ops for dirty tracking

Add to iommu domain operations a set of callbacks to perform dirty
tracking, particulary to start and stop tracking and to read and clear the
dirty data.

Drivers are generally expected to dynamically change its translation
structures to toggle the tracking and flush some form of control state
structure that stands in the IOVA translation path. Though it's not
mandatory, as drivers can also enable dirty tracking at boot, and just
clear the dirty bits before setting dirty tracking. For each of the newly
added IOMMU core APIs:

iommu_cap::IOMMU_CAP_DIRTY_TRACKING: new device iommu_capable value when
probing for capabilities of the device.

.set_dirty_tracking(): an iommu driver is expected to change its
translation structures and enable dirty tracking for the devices in the
iommu_domain. For drivers making dirty tracking always-enabled, it should
just return 0.

.read_and_clear_dirty(): an iommu driver is expected to walk the pagetables
for the iova range passed in and use iommu_dirty_bitmap_record() to record
dirty info per IOVA. When detecting that a given IOVA is dirty it should
also clear its dirty state from the PTE, *unless* the flag
IOMMU_DIRTY_NO_CLEAR is passed in -- flushing is steered from the caller of
the domain_op via iotlb_gather. The iommu core APIs use the same data
structure in use for dirty tracking for VFIO device dirty (struct
iova_bitmap) abstracted by iommu_dirty_bitmap_record() helper function.

domain::dirty_ops: IOMMU domains will store the dirty ops depending on
whether the iommu device supports dirty tracking or not. iommu drivers can
then use this field to figure if the dirty tracking is supported+enforced
on attach. The enforcement is enable via domain_alloc_user() which is done
via IOMMUFD hwpt flag introduced later.

Link: https://lore.kernel.org/r/20231024135109.73787-5-joao.m.martins@oracle.comSigned-off-by: default avatarJoao Martins <joao.m.martins@oracle.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Reviewed-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 13578d4e
......@@ -166,6 +166,10 @@ struct io_pgtable_ops {
struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
int (*read_and_clear_dirty)(struct io_pgtable_ops *ops,
unsigned long iova, size_t size,
unsigned long flags,
struct iommu_dirty_bitmap *dirty);
};
/**
......
......@@ -13,6 +13,7 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/of.h>
#include <linux/iova_bitmap.h>
#include <uapi/linux/iommu.h>
#define IOMMU_READ (1 << 0)
......@@ -37,6 +38,7 @@ struct bus_type;
struct device;
struct iommu_domain;
struct iommu_domain_ops;
struct iommu_dirty_ops;
struct notifier_block;
struct iommu_sva;
struct iommu_fault_event;
......@@ -95,6 +97,8 @@ struct iommu_domain_geometry {
struct iommu_domain {
unsigned type;
const struct iommu_domain_ops *ops;
const struct iommu_dirty_ops *dirty_ops;
unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
struct iommu_domain_geometry geometry;
struct iommu_dma_cookie *iova_cookie;
......@@ -133,6 +137,7 @@ enum iommu_cap {
* usefully support the non-strict DMA flush queue.
*/
IOMMU_CAP_DEFERRED_FLUSH,
IOMMU_CAP_DIRTY_TRACKING, /* IOMMU supports dirty tracking */
};
/* These are the possible reserved region types */
......@@ -227,6 +232,35 @@ struct iommu_iotlb_gather {
bool queued;
};
/**
* struct iommu_dirty_bitmap - Dirty IOVA bitmap state
* @bitmap: IOVA bitmap
* @gather: Range information for a pending IOTLB flush
*/
struct iommu_dirty_bitmap {
struct iova_bitmap *bitmap;
struct iommu_iotlb_gather *gather;
};
/* Read but do not clear any dirty bits */
#define IOMMU_DIRTY_NO_CLEAR (1 << 0)
/**
* struct iommu_dirty_ops - domain specific dirty tracking operations
* @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain
* @read_and_clear_dirty: Walk IOMMU page tables for dirtied PTEs marshalled
* into a bitmap, with a bit represented as a page.
* Reads the dirty PTE bits and clears it from IO
* pagetables.
*/
struct iommu_dirty_ops {
int (*set_dirty_tracking)(struct iommu_domain *domain, bool enabled);
int (*read_and_clear_dirty)(struct iommu_domain *domain,
unsigned long iova, size_t size,
unsigned long flags,
struct iommu_dirty_bitmap *dirty);
};
/**
* struct iommu_ops - iommu ops and capabilities
* @capable: check capability
......@@ -641,6 +675,28 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather)
return gather && gather->queued;
}
static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty,
struct iova_bitmap *bitmap,
struct iommu_iotlb_gather *gather)
{
if (gather)
iommu_iotlb_gather_init(gather);
dirty->bitmap = bitmap;
dirty->gather = gather;
}
static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty,
unsigned long iova,
unsigned long length)
{
if (dirty->bitmap)
iova_bitmap_set(dirty->bitmap, iova, length);
if (dirty->gather)
iommu_iotlb_gather_add_range(dirty->gather, iova, length);
}
/* PCI device grouping function */
extern struct iommu_group *pci_device_group(struct device *dev);
/* Generic device grouping function */
......@@ -746,6 +802,8 @@ struct iommu_fwspec {};
struct iommu_device {};
struct iommu_fault_param {};
struct iommu_iotlb_gather {};
struct iommu_dirty_bitmap {};
struct iommu_dirty_ops {};
static inline bool iommu_present(const struct bus_type *bus)
{
......@@ -978,6 +1036,18 @@ static inline bool iommu_iotlb_gather_queued(struct iommu_iotlb_gather *gather)
return false;
}
static inline void iommu_dirty_bitmap_init(struct iommu_dirty_bitmap *dirty,
struct iova_bitmap *bitmap,
struct iommu_iotlb_gather *gather)
{
}
static inline void iommu_dirty_bitmap_record(struct iommu_dirty_bitmap *dirty,
unsigned long iova,
unsigned long length)
{
}
static inline void iommu_device_unregister(struct iommu_device *iommu)
{
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment