Commit b042b278 authored by Alex Williamson's avatar Alex Williamson

Merge tag 'mlx5-vfio-v10' of...

Merge tag 'mlx5-vfio-v10' of https://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux into v5.18/vfio/next/mlx5-migration-v10

Add mlx5 live migration driver and v2 migration protocol

This series adds mlx5 live migration driver for VFs that are migration
capable and includes the v2 migration protocol definition and mlx5
implementation.

The mlx5 driver uses the vfio_pci_core split to create a specific VFIO
PCI driver that matches the mlx5 virtual functions. The driver provides
the same experience as normal vfio-pci with the addition of migration
support.

In HW the migration is controlled by the PF function, using its
mlx5_core driver, and the VFIO PCI VF driver co-ordinates with the PF to
execute the migration actions.

The bulk of the v2 migration protocol is semantically the same v1,
however it has been recast into a FSM for the device_state and the
actual syscall interface uses normal ioctl(), read() and write() instead
of building a syscall interface using the region.

Several bits of infrastructure work are included here:
 - pci_iov_vf_id() to help drivers like mlx5 figure out the VF index from
   a BDF
 - pci_iov_get_pf_drvdata() to clarify the tricky locking protocol when a
   VF reaches into its PF's driver
 - mlx5_core uses the normal SRIOV lifecycle and disables SRIOV before
   driver remove, to be compatible with pci_iov_get_pf_drvdata()
 - Lifting VFIO_DEVICE_FEATURE into core VFIO code

This series comes after alot of discussion. Some major points:
- v1 ABI compatible migration defined using the same FSM approach:
   https://lore.kernel.org/all/0-v1-a4f7cab64938+3f-vfio_mig_states_jgg@nvidia.com/
- Attempts to clarify how the v1 API works:
   Alex's:
     https://lore.kernel.org/kvm/163909282574.728533.7460416142511440919.stgit@omen/
   Jason's:
     https://lore.kernel.org/all/0-v3-184b374ad0a8+24c-vfio_mig_doc_jgg@nvidia.com/
- Etherpad exploring the scope and questions of general VFIO migration:
     https://lore.kernel.org/kvm/87mtm2loml.fsf@redhat.com/

NOTE: As this series touched mlx5_core parts we need to send this in a
pull request format to VFIO to avoid conflicts.

Matching qemu changes can be previewed here:
 https://github.com/jgunthorpe/qemu/commits/vfio_migration_v2

Link: https://lore.kernel.org/all/20220224142024.147653-1-yishaih@nvidia.comSigned-of-by: default avatarLeon Romanovsky <leonro@nvidia.com>
parents cfb92440 88faa5e8
......@@ -20320,6 +20320,12 @@ L: kvm@vger.kernel.org
S: Maintained
F: drivers/vfio/platform/
VFIO MLX5 PCI DRIVER
M: Yishai Hadas <yishaih@nvidia.com>
L: kvm@vger.kernel.org
S: Maintained
F: drivers/vfio/pci/mlx5/
VGA_SWITCHEROO
R: Lukas Wunner <lukas@wunner.de>
S: Maintained
......
......@@ -477,6 +477,11 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_VHCA_STATE:
case MLX5_CMD_OP_MODIFY_VHCA_STATE:
case MLX5_CMD_OP_ALLOC_SF:
case MLX5_CMD_OP_SUSPEND_VHCA:
case MLX5_CMD_OP_RESUME_VHCA:
case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE:
case MLX5_CMD_OP_SAVE_VHCA_STATE:
case MLX5_CMD_OP_LOAD_VHCA_STATE:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
......@@ -674,6 +679,11 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE);
MLX5_COMMAND_STR_CASE(ALLOC_SF);
MLX5_COMMAND_STR_CASE(DEALLOC_SF);
MLX5_COMMAND_STR_CASE(SUSPEND_VHCA);
MLX5_COMMAND_STR_CASE(RESUME_VHCA);
MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE);
MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE);
MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE);
default: return "unknown command opcode";
}
}
......
......@@ -1620,6 +1620,7 @@ static void remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(dev);
devlink_unregister(devlink);
mlx5_sriov_disable(pdev);
mlx5_crdump_disable(dev);
mlx5_drain_health_wq(dev);
mlx5_uninit_one(dev);
......@@ -1880,6 +1881,50 @@ static struct pci_driver mlx5_core_driver = {
.sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count,
};
/**
* mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if
* mlx5_core is its driver.
* @pdev: The associated PCI device.
*
* Upon return the interface state lock stay held to let caller uses it safely.
* Caller must ensure to use the returned mlx5 device for a narrow window
* and put it back with mlx5_vf_put_core_dev() immediately once usage was over.
*
* Return: Pointer to the associated mlx5_core_dev or NULL.
*/
struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev)
__acquires(&mdev->intf_state_mutex)
{
struct mlx5_core_dev *mdev;
mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver);
if (IS_ERR(mdev))
return NULL;
mutex_lock(&mdev->intf_state_mutex);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) {
mutex_unlock(&mdev->intf_state_mutex);
return NULL;
}
return mdev;
}
EXPORT_SYMBOL(mlx5_vf_get_core_dev);
/**
* mlx5_vf_put_core_dev - Put the mlx5 core device back.
* @mdev: The mlx5 core device.
*
* Upon return the interface state lock is unlocked and caller should not
* access the mdev any more.
*/
void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev)
__releases(&mdev->intf_state_mutex)
{
mutex_unlock(&mdev->intf_state_mutex);
}
EXPORT_SYMBOL(mlx5_vf_put_core_dev);
static void mlx5_core_verify_params(void)
{
if (prof_sel >= ARRAY_SIZE(profile)) {
......
......@@ -164,6 +164,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
int mlx5_sriov_attach(struct mlx5_core_dev *dev);
void mlx5_sriov_detach(struct mlx5_core_dev *dev);
int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
void mlx5_sriov_disable(struct pci_dev *pdev);
int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count);
int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
......
......@@ -161,7 +161,7 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
return err;
}
static void mlx5_sriov_disable(struct pci_dev *pdev)
void mlx5_sriov_disable(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int num_vfs = pci_num_vf(dev->pdev);
......@@ -205,19 +205,8 @@ int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count)
mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf));
sriov = &dev->priv.sriov;
/* Reversed translation of PCI VF function number to the internal
* function_id, which exists in the name of virtfn symlink.
*/
for (id = 0; id < pci_num_vf(pf); id++) {
if (!sriov->vfs_ctx[id].enabled)
continue;
if (vf->devfn == pci_iov_virtfn_devfn(pf, id))
break;
}
if (id == pci_num_vf(pf) || !sriov->vfs_ctx[id].enabled)
id = pci_iov_vf_id(vf);
if (id < 0 || !sriov->vfs_ctx[id].enabled)
return -EINVAL;
return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count);
......
......@@ -33,6 +33,49 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
}
EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn);
int pci_iov_vf_id(struct pci_dev *dev)
{
struct pci_dev *pf;
if (!dev->is_virtfn)
return -EINVAL;
pf = pci_physfn(dev);
return (((dev->bus->number << 8) + dev->devfn) -
((pf->bus->number << 8) + pf->devfn + pf->sriov->offset)) /
pf->sriov->stride;
}
EXPORT_SYMBOL_GPL(pci_iov_vf_id);
/**
* pci_iov_get_pf_drvdata - Return the drvdata of a PF
* @dev - VF pci_dev
* @pf_driver - Device driver required to own the PF
*
* This must be called from a context that ensures that a VF driver is attached.
* The value returned is invalid once the VF driver completes its remove()
* callback.
*
* Locking is achieved by the driver core. A VF driver cannot be probed until
* pci_enable_sriov() is called and pci_disable_sriov() does not return until
* all VF drivers have completed their remove().
*
* The PF driver must call pci_disable_sriov() before it begins to destroy the
* drvdata.
*/
void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver)
{
struct pci_dev *pf_dev;
if (!dev->is_virtfn)
return ERR_PTR(-EINVAL);
pf_dev = dev->physfn;
if (pf_dev->driver != pf_driver)
return ERR_PTR(-EINVAL);
return pci_get_drvdata(pf_dev);
}
EXPORT_SYMBOL_GPL(pci_iov_get_pf_drvdata);
/*
* Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
* change when NumVFs changes.
......
......@@ -43,4 +43,7 @@ config VFIO_PCI_IGD
To enable Intel IGD assignment through vfio-pci, say Y.
endif
source "drivers/vfio/pci/mlx5/Kconfig"
endif
......@@ -7,3 +7,5 @@ obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
vfio-pci-y := vfio_pci.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/
# SPDX-License-Identifier: GPL-2.0-only
config MLX5_VFIO_PCI
tristate "VFIO support for MLX5 PCI devices"
depends on MLX5_CORE
depends on VFIO_PCI_CORE
help
This provides migration support for MLX5 devices using the VFIO
framework.
If you don't know what to do here, say N.
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5-vfio-pci.o
mlx5-vfio-pci-y := main.o cmd.o
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
*/
#include "cmd.h"
int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
int ret;
if (!mdev)
return -ENOTCONN;
MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
MLX5_SET(suspend_vhca_in, in, vhca_id, vhca_id);
MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
ret = mlx5_cmd_exec_inout(mdev, suspend_vhca, in, out);
mlx5_vf_put_core_dev(mdev);
return ret;
}
int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {};
u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {};
int ret;
if (!mdev)
return -ENOTCONN;
MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA);
MLX5_SET(resume_vhca_in, in, vhca_id, vhca_id);
MLX5_SET(resume_vhca_in, in, op_mod, op_mod);
ret = mlx5_cmd_exec_inout(mdev, resume_vhca, in, out);
mlx5_vf_put_core_dev(mdev);
return ret;
}
int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
size_t *state_size)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
int ret;
if (!mdev)
return -ENOTCONN;
MLX5_SET(query_vhca_migration_state_in, in, opcode,
MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
MLX5_SET(query_vhca_migration_state_in, in, vhca_id, vhca_id);
MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
ret = mlx5_cmd_exec_inout(mdev, query_vhca_migration_state, in, out);
if (ret)
goto end;
*state_size = MLX5_GET(query_vhca_migration_state_out, out,
required_umem_size);
end:
mlx5_vf_put_core_dev(mdev);
return ret;
}
int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
int out_size;
void *out;
int ret;
if (!mdev)
return -ENOTCONN;
out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
out = kzalloc(out_size, GFP_KERNEL);
if (!out) {
ret = -ENOMEM;
goto end;
}
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, other_function, 1);
MLX5_SET(query_hca_cap_in, in, function_id, function_id);
MLX5_SET(query_hca_cap_in, in, op_mod,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
HCA_CAP_OPMOD_GET_CUR);
ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
if (ret)
goto err_exec;
*vhca_id = MLX5_GET(query_hca_cap_out, out,
capability.cmd_hca_cap.vhca_id);
err_exec:
kfree(out);
end:
mlx5_vf_put_core_dev(mdev);
return ret;
}
static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
struct mlx5_vf_migration_file *migf, u32 *mkey)
{
size_t npages = DIV_ROUND_UP(migf->total_length, PAGE_SIZE);
struct sg_dma_page_iter dma_iter;
int err = 0, inlen;
__be64 *mtt;
void *mkc;
u32 *in;
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
sizeof(*mtt) * round_up(npages, 2);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
DIV_ROUND_UP(npages, 2));
mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
*mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, rr, 1);
MLX5_SET(mkc, mkc, rw, 1);
MLX5_SET(mkc, mkc, pd, pdn);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
MLX5_SET64(mkc, mkc, len, migf->total_length);
err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
kvfree(in);
return err;
}
int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
struct mlx5_vf_migration_file *migf)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
u32 pdn, mkey;
int err;
if (!mdev)
return -ENOTCONN;
err = mlx5_core_alloc_pd(mdev, &pdn);
if (err)
goto end;
err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
0);
if (err)
goto err_dma_map;
err = _create_state_mkey(mdev, pdn, migf, &mkey);
if (err)
goto err_create_mkey;
MLX5_SET(save_vhca_state_in, in, opcode,
MLX5_CMD_OP_SAVE_VHCA_STATE);
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
MLX5_SET(save_vhca_state_in, in, vhca_id, vhca_id);
MLX5_SET(save_vhca_state_in, in, mkey, mkey);
MLX5_SET(save_vhca_state_in, in, size, migf->total_length);
err = mlx5_cmd_exec_inout(mdev, save_vhca_state, in, out);
if (err)
goto err_exec;
migf->total_length =
MLX5_GET(save_vhca_state_out, out, actual_image_size);
mlx5_core_destroy_mkey(mdev, mkey);
mlx5_core_dealloc_pd(mdev, pdn);
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
mlx5_vf_put_core_dev(mdev);
return 0;
err_exec:
mlx5_core_destroy_mkey(mdev, mkey);
err_create_mkey:
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
err_dma_map:
mlx5_core_dealloc_pd(mdev, pdn);
end:
mlx5_vf_put_core_dev(mdev);
return err;
}
int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
struct mlx5_vf_migration_file *migf)
{
struct mlx5_core_dev *mdev = mlx5_vf_get_core_dev(pdev);
u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
u32 pdn, mkey;
int err;
if (!mdev)
return -ENOTCONN;
mutex_lock(&migf->lock);
if (!migf->total_length) {
err = -EINVAL;
goto end;
}
err = mlx5_core_alloc_pd(mdev, &pdn);
if (err)
goto end;
err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
if (err)
goto err_reg;
err = _create_state_mkey(mdev, pdn, migf, &mkey);
if (err)
goto err_mkey;
MLX5_SET(load_vhca_state_in, in, opcode,
MLX5_CMD_OP_LOAD_VHCA_STATE);
MLX5_SET(load_vhca_state_in, in, op_mod, 0);
MLX5_SET(load_vhca_state_in, in, vhca_id, vhca_id);
MLX5_SET(load_vhca_state_in, in, mkey, mkey);
MLX5_SET(load_vhca_state_in, in, size, migf->total_length);
err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out);
mlx5_core_destroy_mkey(mdev, mkey);
err_mkey:
dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
err_reg:
mlx5_core_dealloc_pd(mdev, pdn);
end:
mlx5_vf_put_core_dev(mdev);
mutex_unlock(&migf->lock);
return err;
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*/
#ifndef MLX5_VFIO_CMD_H
#define MLX5_VFIO_CMD_H
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>
struct mlx5_vf_migration_file {
struct file *filp;
struct mutex lock;
bool disabled;
struct sg_append_table table;
size_t total_length;
size_t allocated_length;
/* Optimize mlx5vf_get_migration_page() for sequential access */
struct scatterlist *last_offset_sg;
unsigned int sg_last_entry;
unsigned long last_offset;
};
int mlx5vf_cmd_suspend_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
int mlx5vf_cmd_resume_vhca(struct pci_dev *pdev, u16 vhca_id, u16 op_mod);
int mlx5vf_cmd_query_vhca_migration_state(struct pci_dev *pdev, u16 vhca_id,
size_t *state_size);
int mlx5vf_cmd_get_vhca_id(struct pci_dev *pdev, u16 function_id, u16 *vhca_id);
int mlx5vf_cmd_save_vhca_state(struct pci_dev *pdev, u16 vhca_id,
struct mlx5_vf_migration_file *migf);
int mlx5vf_cmd_load_vhca_state(struct pci_dev *pdev, u16 vhca_id,
struct mlx5_vf_migration_file *migf);
#endif /* MLX5_VFIO_CMD_H */
This diff is collapsed.
......@@ -130,6 +130,7 @@ static const struct vfio_device_ops vfio_pci_ops = {
.open_device = vfio_pci_open_device,
.close_device = vfio_pci_core_close_device,
.ioctl = vfio_pci_core_ioctl,
.device_feature = vfio_pci_core_ioctl_feature,
.read = vfio_pci_core_read,
.write = vfio_pci_core_write,
.mmap = vfio_pci_core_mmap,
......
......@@ -1114,70 +1114,50 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
ioeventfd.data, count, ioeventfd.fd);
} else if (cmd == VFIO_DEVICE_FEATURE) {
struct vfio_device_feature feature;
uuid_t uuid;
minsz = offsetofend(struct vfio_device_feature, flags);
if (copy_from_user(&feature, (void __user *)arg, minsz))
return -EFAULT;
if (feature.argsz < minsz)
return -EINVAL;
/* Check unknown flags */
if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |
VFIO_DEVICE_FEATURE_SET |
VFIO_DEVICE_FEATURE_GET |
VFIO_DEVICE_FEATURE_PROBE))
return -EINVAL;
/* GET & SET are mutually exclusive except with PROBE */
if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
(feature.flags & VFIO_DEVICE_FEATURE_SET) &&
(feature.flags & VFIO_DEVICE_FEATURE_GET))
return -EINVAL;
switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
if (!vdev->vf_token)
return -ENOTTY;
/*
* We do not support GET of the VF Token UUID as this
* could expose the token of the previous device user.
*/
if (feature.flags & VFIO_DEVICE_FEATURE_GET)
return -EINVAL;
if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)
return 0;
}
return -ENOTTY;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);
/* Don't SET unless told to do so */
if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))
return -EINVAL;
static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz)
{
struct vfio_pci_core_device *vdev =
container_of(device, struct vfio_pci_core_device, vdev);
uuid_t uuid;
int ret;
if (feature.argsz < minsz + sizeof(uuid))
return -EINVAL;
if (!vdev->vf_token)
return -ENOTTY;
/*
* We do not support GET of the VF Token UUID as this could
* expose the token of the previous device user.
*/
ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET,
sizeof(uuid));
if (ret != 1)
return ret;
if (copy_from_user(&uuid, (void __user *)(arg + minsz),
sizeof(uuid)))
return -EFAULT;
if (copy_from_user(&uuid, arg, sizeof(uuid)))
return -EFAULT;
mutex_lock(&vdev->vf_token->lock);
uuid_copy(&vdev->vf_token->uuid, &uuid);
mutex_unlock(&vdev->vf_token->lock);
mutex_lock(&vdev->vf_token->lock);
uuid_copy(&vdev->vf_token->uuid, &uuid);
mutex_unlock(&vdev->vf_token->lock);
return 0;
}
return 0;
default:
return -ENOTTY;
}
int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz)
{
switch (flags & VFIO_DEVICE_FEATURE_MASK) {
case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
return vfio_pci_core_feature_token(device, flags, arg, argsz);
default:
return -ENOTTY;
}
return -ENOTTY;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);
EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl_feature);
static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
......@@ -1891,8 +1871,8 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
}
EXPORT_SYMBOL_GPL(vfio_pci_core_unregister_device);
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct vfio_pci_core_device *vdev;
struct vfio_device *device;
......@@ -1914,6 +1894,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
return PCI_ERS_RESULT_CAN_RECOVER;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_aer_err_detected);
int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
{
......@@ -1936,7 +1917,7 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
EXPORT_SYMBOL_GPL(vfio_pci_core_sriov_configure);
const struct pci_error_handlers vfio_pci_core_err_handlers = {
.error_detected = vfio_pci_aer_err_detected,
.error_detected = vfio_pci_core_aer_err_detected,
};
EXPORT_SYMBOL_GPL(vfio_pci_core_err_handlers);
......
This diff is collapsed.
......@@ -1143,6 +1143,9 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
u64 length, u16 uid, phys_addr_t addr, u32 obj_id);
struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev);
void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev);
#ifdef CONFIG_MLX5_CORE_IPOIB
struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
struct ib_device *ibdev,
......
......@@ -127,6 +127,11 @@ enum {
MLX5_CMD_OP_QUERY_SF_PARTITION = 0x111,
MLX5_CMD_OP_ALLOC_SF = 0x113,
MLX5_CMD_OP_DEALLOC_SF = 0x114,
MLX5_CMD_OP_SUSPEND_VHCA = 0x115,
MLX5_CMD_OP_RESUME_VHCA = 0x116,
MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE = 0x117,
MLX5_CMD_OP_SAVE_VHCA_STATE = 0x118,
MLX5_CMD_OP_LOAD_VHCA_STATE = 0x119,
MLX5_CMD_OP_CREATE_MKEY = 0x200,
MLX5_CMD_OP_QUERY_MKEY = 0x201,
MLX5_CMD_OP_DESTROY_MKEY = 0x202,
......@@ -1757,7 +1762,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_682[0x1];
u8 log_max_sf[0x5];
u8 apu[0x1];
u8 reserved_at_689[0x7];
u8 reserved_at_689[0x4];
u8 migration[0x1];
u8 reserved_at_68e[0x2];
u8 log_min_sf_size[0x8];
u8 max_num_sf_partitions[0x8];
......@@ -11519,4 +11526,142 @@ enum {
MLX5_MTT_PERM_RW = MLX5_MTT_PERM_READ | MLX5_MTT_PERM_WRITE,
};
enum {
MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR = 0x0,
MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER = 0x1,
};
struct mlx5_ifc_suspend_vhca_in_bits {
u8 opcode[0x10];
u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
u8 reserved_at_40[0x10];
u8 vhca_id[0x10];
u8 reserved_at_60[0x20];
};
struct mlx5_ifc_suspend_vhca_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
u8 syndrome[0x20];
u8 reserved_at_40[0x40];
};
enum {
MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_RESPONDER = 0x0,
MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR = 0x1,
};
struct mlx5_ifc_resume_vhca_in_bits {
u8 opcode[0x10];
u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
u8 reserved_at_40[0x10];
u8 vhca_id[0x10];
u8 reserved_at_60[0x20];
};
struct mlx5_ifc_resume_vhca_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
u8 syndrome[0x20];
u8 reserved_at_40[0x40];
};
struct mlx5_ifc_query_vhca_migration_state_in_bits {
u8 opcode[0x10];
u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
u8 reserved_at_40[0x10];
u8 vhca_id[0x10];
u8 reserved_at_60[0x20];
};
struct mlx5_ifc_query_vhca_migration_state_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
u8 syndrome[0x20];
u8 reserved_at_40[0x40];
u8 required_umem_size[0x20];
u8 reserved_at_a0[0x160];
};
struct mlx5_ifc_save_vhca_state_in_bits {
u8 opcode[0x10];
u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
u8 reserved_at_40[0x10];
u8 vhca_id[0x10];
u8 reserved_at_60[0x20];
u8 va[0x40];
u8 mkey[0x20];
u8 size[0x20];
};
struct mlx5_ifc_save_vhca_state_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
u8 syndrome[0x20];
u8 actual_image_size[0x20];
u8 reserved_at_60[0x20];
};
struct mlx5_ifc_load_vhca_state_in_bits {
u8 opcode[0x10];
u8 uid[0x10];
u8 reserved_at_20[0x10];
u8 op_mod[0x10];
u8 reserved_at_40[0x10];
u8 vhca_id[0x10];
u8 reserved_at_60[0x20];
u8 va[0x40];
u8 mkey[0x20];
u8 size[0x20];
};
struct mlx5_ifc_load_vhca_state_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
u8 syndrome[0x20];
u8 reserved_at_40[0x40];
};
#endif /* MLX5_IFC_H */
......@@ -2166,7 +2166,8 @@ void __iomem *pci_ioremap_wc_bar(struct pci_dev *pdev, int bar);
#ifdef CONFIG_PCI_IOV
int pci_iov_virtfn_bus(struct pci_dev *dev, int id);
int pci_iov_virtfn_devfn(struct pci_dev *dev, int id);
int pci_iov_vf_id(struct pci_dev *dev);
void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver);
int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
void pci_disable_sriov(struct pci_dev *dev);
......@@ -2194,6 +2195,18 @@ static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id)
{
return -ENOSYS;
}
static inline int pci_iov_vf_id(struct pci_dev *dev)
{
return -ENOSYS;
}
static inline void *pci_iov_get_pf_drvdata(struct pci_dev *dev,
struct pci_driver *pf_driver)
{
return ERR_PTR(-EINVAL);
}
static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
{ return -ENODEV; }
......
......@@ -33,6 +33,7 @@ struct vfio_device {
struct vfio_group *group;
struct vfio_device_set *dev_set;
struct list_head dev_set_list;
unsigned int migration_flags;
/* Members below here are private, not for driver use */
refcount_t refcount;
......@@ -55,6 +56,17 @@ struct vfio_device {
* @match: Optional device name match callback (return: 0 for no-match, >0 for
* match, -errno for abort (ex. match with insufficient or incorrect
* additional args)
* @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
* @migration_set_state: Optional callback to change the migration state for
* devices that support migration. It's mandatory for
* VFIO_DEVICE_FEATURE_MIGRATION migration support.
* The returned FD is used for data transfer according to the FSM
* definition. The driver is responsible to ensure that FD reaches end
* of stream or error whenever the migration FSM leaves a data transfer
* state or before close_device() returns.
* @migration_get_state: Optional callback to get the migration state for
* devices that support migration. It's mandatory for
* VFIO_DEVICE_FEATURE_MIGRATION migration support.
*/
struct vfio_device_ops {
char *name;
......@@ -69,8 +81,44 @@ struct vfio_device_ops {
int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
void (*request)(struct vfio_device *vdev, unsigned int count);
int (*match)(struct vfio_device *vdev, char *buf);
int (*device_feature)(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz);
struct file *(*migration_set_state)(
struct vfio_device *device,
enum vfio_device_mig_state new_state);
int (*migration_get_state)(struct vfio_device *device,
enum vfio_device_mig_state *curr_state);
};
/**
* vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl
* @flags: Arg from the device_feature op
* @argsz: Arg from the device_feature op
* @supported_ops: Combination of VFIO_DEVICE_FEATURE_GET and SET the driver
* supports
* @minsz: Minimum data size the driver accepts
*
* For use in a driver's device_feature op. Checks that the inputs to the
* VFIO_DEVICE_FEATURE ioctl are correct for the driver's feature. Returns 1 if
* the driver should execute the get or set, otherwise the relevant
* value should be returned.
*/
static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops,
size_t minsz)
{
if ((flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET)) &
~supported_ops)
return -EINVAL;
if (flags & VFIO_DEVICE_FEATURE_PROBE)
return 0;
/* Without PROBE one of GET or SET must be requested */
if (!(flags & (VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_SET)))
return -EINVAL;
if (argsz < minsz)
return -EINVAL;
return 1;
}
void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
const struct vfio_device_ops *ops);
void vfio_uninit_group_dev(struct vfio_device *device);
......@@ -82,6 +130,11 @@ extern void vfio_device_put(struct vfio_device *device);
int vfio_assign_device_set(struct vfio_device *device, void *set_id);
int vfio_mig_get_next_state(struct vfio_device *device,
enum vfio_device_mig_state cur_fsm,
enum vfio_device_mig_state new_fsm,
enum vfio_device_mig_state *next_fsm);
/*
* External user API
*/
......
......@@ -220,6 +220,8 @@ int vfio_pci_core_sriov_configure(struct pci_dev *pdev, int nr_virtfn);
extern const struct pci_error_handlers vfio_pci_core_err_handlers;
long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
unsigned long arg);
int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz);
ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
size_t count, loff_t *ppos);
ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
......@@ -230,6 +232,8 @@ int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf);
int vfio_pci_core_enable(struct vfio_pci_core_device *vdev);
void vfio_pci_core_disable(struct vfio_pci_core_device *vdev);
void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev);
pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
pci_channel_state_t state);
static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
{
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment