Commit 0181f8c8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:
 "Several new features here:

   - virtio-balloon supports new stats

   - vdpa supports setting mac address

   - vdpa/mlx5 suspend/resume as well as MKEY ops are now faster

   - virtio_fs supports new sysfs entries for queue info

   - virtio/vsock performance has been improved

  And fixes, cleanups all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (34 commits)
  vsock/virtio: avoid queuing packets when intermediate queue is empty
  vsock/virtio: refactor virtio_transport_send_pkt_work
  fw_cfg: Constify struct kobj_type
  vdpa/mlx5: Postpone MR deletion
  vdpa/mlx5: Introduce init/destroy for MR resources
  vdpa/mlx5: Rename mr_mtx -> lock
  vdpa/mlx5: Extract mr members in own resource struct
  vdpa/mlx5: Rename function
  vdpa/mlx5: Delete direct MKEYs in parallel
  vdpa/mlx5: Create direct MKEYs in parallel
  MAINTAINERS: add virtio-vsock driver in the VIRTIO CORE section
  virtio_fs: add sysfs entries for queue information
  virtio_fs: introduce virtio_fs_put_locked helper
  vdpa: Remove unused declarations
  vdpa/mlx5: Parallelize VQ suspend/resume for CVQ MQ command
  vdpa/mlx5: Small improvement for change_num_qps()
  vdpa/mlx5: Keep notifiers during suspend but ignore
  vdpa/mlx5: Parallelize device resume
  vdpa/mlx5: Parallelize device suspend
  vdpa/mlx5: Use async API for vq modify commands
  ...
parents 11a299a7 efcd71af
...@@ -24464,6 +24464,7 @@ F: include/linux/vdpa.h ...@@ -24464,6 +24464,7 @@ F: include/linux/vdpa.h
F: include/linux/virtio*.h F: include/linux/virtio*.h
F: include/linux/vringh.h F: include/linux/vringh.h
F: include/uapi/linux/virtio_*.h F: include/uapi/linux/virtio_*.h
F: net/vmw_vsock/virtio*
F: tools/virtio/ F: tools/virtio/
F: tools/testing/selftests/drivers/net/virtio_net/ F: tools/testing/selftests/drivers/net/virtio_net/
......
...@@ -452,7 +452,7 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj) ...@@ -452,7 +452,7 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj)
} }
/* kobj_type: ties together all properties required to register an entry */ /* kobj_type: ties together all properties required to register an entry */
static struct kobj_type fw_cfg_sysfs_entry_ktype = { static const struct kobj_type fw_cfg_sysfs_entry_ktype = {
.default_groups = fw_cfg_sysfs_entry_groups, .default_groups = fw_cfg_sysfs_entry_groups,
.sysfs_ops = &fw_cfg_sysfs_attr_ops, .sysfs_ops = &fw_cfg_sysfs_attr_ops,
.release = fw_cfg_sysfs_release_entry, .release = fw_cfg_sysfs_release_entry,
......
...@@ -1887,10 +1887,12 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, ...@@ -1887,10 +1887,12 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
throttle_op = mlx5_cmd_is_throttle_opcode(opcode); throttle_op = mlx5_cmd_is_throttle_opcode(opcode);
if (throttle_op) { if (throttle_op) {
/* atomic context may not sleep */ if (callback) {
if (callback) if (down_trylock(&dev->cmd.vars.throttle_sem))
return -EINVAL; return -EBUSY;
down(&dev->cmd.vars.throttle_sem); } else {
down(&dev->cmd.vars.throttle_sem);
}
} }
pages_queue = is_manage_pages(in); pages_queue = is_manage_pages(in);
...@@ -2096,10 +2098,19 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) ...@@ -2096,10 +2098,19 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
{ {
struct mlx5_async_work *work = _work; struct mlx5_async_work *work = _work;
struct mlx5_async_ctx *ctx; struct mlx5_async_ctx *ctx;
struct mlx5_core_dev *dev;
u16 opcode;
ctx = work->ctx; ctx = work->ctx;
status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); dev = ctx->dev;
opcode = work->opcode;
status = cmd_status_err(dev, status, work->opcode, work->op_mod, work->out);
work->user_callback(status, work); work->user_callback(status, work);
/* Can't access "work" from this point on. It could have been freed in
* the callback.
*/
if (mlx5_cmd_is_throttle_opcode(opcode))
up(&dev->cmd.vars.throttle_sem);
if (atomic_dec_and_test(&ctx->num_inflight)) if (atomic_dec_and_test(&ctx->num_inflight))
complete(&ctx->inflight_done); complete(&ctx->inflight_done);
} }
......
...@@ -44,6 +44,15 @@ static int virtio_pmem_flush(struct nd_region *nd_region) ...@@ -44,6 +44,15 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
unsigned long flags; unsigned long flags;
int err, err1; int err, err1;
/*
* Don't bother to submit the request to the device if the device is
* not activated.
*/
if (vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_NEEDS_RESET) {
dev_info(&vdev->dev, "virtio pmem device needs a reset\n");
return -EIO;
}
might_sleep(); might_sleep();
req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
if (!req_data) if (!req_data)
......
...@@ -112,15 +112,12 @@ void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset, ...@@ -112,15 +112,12 @@ void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset,
const void *src, int length); const void *src, int length);
u8 ifcvf_get_status(struct ifcvf_hw *hw); u8 ifcvf_get_status(struct ifcvf_hw *hw);
void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); void ifcvf_set_status(struct ifcvf_hw *hw, u8 status);
void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
void ifcvf_reset(struct ifcvf_hw *hw); void ifcvf_reset(struct ifcvf_hw *hw);
u64 ifcvf_get_dev_features(struct ifcvf_hw *hw); u64 ifcvf_get_dev_features(struct ifcvf_hw *hw);
u64 ifcvf_get_hw_features(struct ifcvf_hw *hw); u64 ifcvf_get_hw_features(struct ifcvf_hw *hw);
int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features); int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features);
u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num); int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num);
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
int ifcvf_probed_virtio_net(struct ifcvf_hw *hw);
u32 ifcvf_get_config_size(struct ifcvf_hw *hw); u32 ifcvf_get_config_size(struct ifcvf_hw *hw);
u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector); u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector);
u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector); u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector);
......
...@@ -83,10 +83,28 @@ enum { ...@@ -83,10 +83,28 @@ enum {
MLX5_VDPA_NUM_AS = 2 MLX5_VDPA_NUM_AS = 2
}; };
struct mlx5_vdpa_mr_resources {
struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS];
unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
/* Pre-deletion mr list */
struct list_head mr_list_head;
/* Deferred mr list */
struct list_head mr_gc_list_head;
struct workqueue_struct *wq_gc;
struct delayed_work gc_dwork_ent;
struct mutex lock;
atomic_t shutdown;
};
struct mlx5_vdpa_dev { struct mlx5_vdpa_dev {
struct vdpa_device vdev; struct vdpa_device vdev;
struct mlx5_core_dev *mdev; struct mlx5_core_dev *mdev;
struct mlx5_vdpa_resources res; struct mlx5_vdpa_resources res;
struct mlx5_vdpa_mr_resources mres;
u64 mlx_features; u64 mlx_features;
u64 actual_features; u64 actual_features;
...@@ -95,14 +113,23 @@ struct mlx5_vdpa_dev { ...@@ -95,14 +113,23 @@ struct mlx5_vdpa_dev {
u16 max_idx; u16 max_idx;
u32 generation; u32 generation;
struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS];
struct list_head mr_list_head;
/* serialize mr access */
struct mutex mr_mtx;
struct mlx5_control_vq cvq; struct mlx5_control_vq cvq;
struct workqueue_struct *wq; struct workqueue_struct *wq;
unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
bool suspended; bool suspended;
struct mlx5_async_ctx async_ctx;
};
struct mlx5_vdpa_async_cmd {
int err;
struct mlx5_async_work cb_work;
struct completion cmd_done;
void *in;
size_t inlen;
void *out;
size_t outlen;
}; };
int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
...@@ -121,7 +148,9 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, ...@@ -121,7 +148,9 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
struct vhost_iotlb *iotlb); struct vhost_iotlb *iotlb);
int mlx5_vdpa_init_mr_resources(struct mlx5_vdpa_dev *mvdev);
void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev); void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev);
void mlx5_vdpa_clean_mrs(struct mlx5_vdpa_dev *mvdev);
void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev, void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_mr *mr); struct mlx5_vdpa_mr *mr);
void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev, void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
...@@ -134,6 +163,14 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev, ...@@ -134,6 +163,14 @@ int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
unsigned int asid); unsigned int asid);
int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev); int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev);
int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid); int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_async_cmd *cmds,
int num_cmds);
#define mlx5_vdpa_err(__dev, format, ...) \
dev_err((__dev)->mdev->device, "%s:%d:(pid %d) error: " format, __func__, __LINE__, \
current->pid, ##__VA_ARGS__)
#define mlx5_vdpa_warn(__dev, format, ...) \ #define mlx5_vdpa_warn(__dev, format, ...) \
dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \
......
This diff is collapsed.
...@@ -256,7 +256,6 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) ...@@ -256,7 +256,6 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
mlx5_vdpa_warn(mvdev, "resources already allocated\n"); mlx5_vdpa_warn(mvdev, "resources already allocated\n");
return -EINVAL; return -EINVAL;
} }
mutex_init(&mvdev->mr_mtx);
res->uar = mlx5_get_uars_page(mdev); res->uar = mlx5_get_uars_page(mdev);
if (IS_ERR(res->uar)) { if (IS_ERR(res->uar)) {
err = PTR_ERR(res->uar); err = PTR_ERR(res->uar);
...@@ -301,7 +300,6 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) ...@@ -301,7 +300,6 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
err_uctx: err_uctx:
mlx5_put_uars_page(mdev, res->uar); mlx5_put_uars_page(mdev, res->uar);
err_uars: err_uars:
mutex_destroy(&mvdev->mr_mtx);
return err; return err;
} }
...@@ -318,6 +316,78 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) ...@@ -318,6 +316,78 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
dealloc_pd(mvdev, res->pdn, res->uid); dealloc_pd(mvdev, res->pdn, res->uid);
destroy_uctx(mvdev, res->uid); destroy_uctx(mvdev, res->uid);
mlx5_put_uars_page(mvdev->mdev, res->uar); mlx5_put_uars_page(mvdev->mdev, res->uar);
mutex_destroy(&mvdev->mr_mtx);
res->valid = false; res->valid = false;
} }
static void virtqueue_cmd_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_vdpa_async_cmd *cmd =
container_of(context, struct mlx5_vdpa_async_cmd, cb_work);
cmd->err = mlx5_cmd_check(context->ctx->dev, status, cmd->in, cmd->out);
complete(&cmd->cmd_done);
}
static int issue_async_cmd(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_async_cmd *cmds,
int issued,
int *completed)
{
struct mlx5_vdpa_async_cmd *cmd = &cmds[issued];
int err;
retry:
err = mlx5_cmd_exec_cb(&mvdev->async_ctx,
cmd->in, cmd->inlen,
cmd->out, cmd->outlen,
virtqueue_cmd_callback,
&cmd->cb_work);
if (err == -EBUSY) {
if (*completed < issued) {
/* Throttled by own commands: wait for oldest completion. */
wait_for_completion(&cmds[*completed].cmd_done);
(*completed)++;
goto retry;
} else {
/* Throttled by external commands: switch to sync api. */
err = mlx5_cmd_exec(mvdev->mdev,
cmd->in, cmd->inlen,
cmd->out, cmd->outlen);
if (!err)
(*completed)++;
}
}
return err;
}
int mlx5_vdpa_exec_async_cmds(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_async_cmd *cmds,
int num_cmds)
{
int completed = 0;
int issued = 0;
int err = 0;
for (int i = 0; i < num_cmds; i++)
init_completion(&cmds[i].cmd_done);
while (issued < num_cmds) {
err = issue_async_cmd(mvdev, cmds, issued, &completed);
if (err) {
mlx5_vdpa_err(mvdev, "error issuing command %d of %d: %d\n",
issued, num_cmds, err);
break;
}
issued++;
}
while (completed < issued)
wait_for_completion(&cmds[completed++].cmd_done);
return err;
}
This diff is collapsed.
...@@ -14,5 +14,4 @@ int pds_vdpa_cmd_init_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, ...@@ -14,5 +14,4 @@ int pds_vdpa_cmd_init_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx,
struct pds_vdpa_vq_info *vq_info); struct pds_vdpa_vq_info *vq_info);
int pds_vdpa_cmd_reset_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx, int pds_vdpa_cmd_reset_vq(struct pds_vdpa_device *pdsv, u16 qid, u16 invert_idx,
struct pds_vdpa_vq_info *vq_info); struct pds_vdpa_vq_info *vq_info);
int pds_vdpa_cmd_set_features(struct pds_vdpa_device *pdsv, u64 features);
#endif /* _VDPA_CMDS_H_ */ #endif /* _VDPA_CMDS_H_ */
...@@ -1361,6 +1361,80 @@ static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info ...@@ -1361,6 +1361,80 @@ static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info
return err; return err;
} }
static int vdpa_dev_net_device_attr_set(struct vdpa_device *vdev,
struct genl_info *info)
{
struct vdpa_dev_set_config set_config = {};
struct vdpa_mgmt_dev *mdev = vdev->mdev;
struct nlattr **nl_attrs = info->attrs;
const u8 *macaddr;
int err = -EOPNOTSUPP;
down_write(&vdev->cf_lock);
if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) {
set_config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]);
if (is_valid_ether_addr(macaddr)) {
ether_addr_copy(set_config.net.mac, macaddr);
if (mdev->ops->dev_set_attr) {
err = mdev->ops->dev_set_attr(mdev, vdev,
&set_config);
} else {
NL_SET_ERR_MSG_FMT_MOD(info->extack,
"Operation not supported by the device.");
}
} else {
NL_SET_ERR_MSG_FMT_MOD(info->extack,
"Invalid MAC address");
}
}
up_write(&vdev->cf_lock);
return err;
}
static int vdpa_nl_cmd_dev_attr_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct vdpa_device *vdev;
struct device *dev;
const char *name;
u64 classes;
int err = 0;
if (!info->attrs[VDPA_ATTR_DEV_NAME])
return -EINVAL;
name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
down_write(&vdpa_dev_lock);
dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match);
if (!dev) {
NL_SET_ERR_MSG_MOD(info->extack, "device not found");
err = -ENODEV;
goto dev_err;
}
vdev = container_of(dev, struct vdpa_device, dev);
if (!vdev->mdev) {
NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device");
err = -EINVAL;
goto mdev_err;
}
classes = vdpa_mgmtdev_get_classes(vdev->mdev, NULL);
if (classes & BIT_ULL(VIRTIO_ID_NET)) {
err = vdpa_dev_net_device_attr_set(vdev, info);
} else {
NL_SET_ERR_MSG_FMT_MOD(info->extack, "%s device not supported",
name);
}
mdev_err:
put_device(dev);
dev_err:
up_write(&vdpa_dev_lock);
return err;
}
static int vdpa_dev_config_dump(struct device *dev, void *data) static int vdpa_dev_config_dump(struct device *dev, void *data)
{ {
struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
...@@ -1497,6 +1571,11 @@ static const struct genl_ops vdpa_nl_ops[] = { ...@@ -1497,6 +1571,11 @@ static const struct genl_ops vdpa_nl_ops[] = {
.doit = vdpa_nl_cmd_dev_stats_get_doit, .doit = vdpa_nl_cmd_dev_stats_get_doit,
.flags = GENL_ADMIN_PERM, .flags = GENL_ADMIN_PERM,
}, },
{
.cmd = VDPA_CMD_DEV_ATTR_SET,
.doit = vdpa_nl_cmd_dev_attr_set_doit,
.flags = GENL_ADMIN_PERM,
},
}; };
static struct genl_family vdpa_nl_family __ro_after_init = { static struct genl_family vdpa_nl_family __ro_after_init = {
......
...@@ -414,6 +414,24 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) ...@@ -414,6 +414,24 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
} }
static int vdpasim_net_set_attr(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev,
const struct vdpa_dev_set_config *config)
{
struct vdpasim *vdpasim = container_of(dev, struct vdpasim, vdpa);
struct virtio_net_config *vio_config = vdpasim->config;
mutex_lock(&vdpasim->mutex);
if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
ether_addr_copy(vio_config->mac, config->net.mac);
mutex_unlock(&vdpasim->mutex);
return 0;
}
mutex_unlock(&vdpasim->mutex);
return -EOPNOTSUPP;
}
static void vdpasim_net_setup_config(struct vdpasim *vdpasim, static void vdpasim_net_setup_config(struct vdpasim *vdpasim,
const struct vdpa_dev_set_config *config) const struct vdpa_dev_set_config *config)
{ {
...@@ -510,7 +528,8 @@ static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev, ...@@ -510,7 +528,8 @@ static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev,
static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = { static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = {
.dev_add = vdpasim_net_dev_add, .dev_add = vdpasim_net_dev_add,
.dev_del = vdpasim_net_dev_del .dev_del = vdpasim_net_dev_del,
.dev_set_attr = vdpasim_net_set_attr
}; };
static struct virtio_device_id id_table[] = { static struct virtio_device_id id_table[] = {
......
...@@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) ...@@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
if (irq < 0) if (irq < 0)
return; return;
irq_bypass_unregister_producer(&vq->call_ctx.producer);
if (!vq->call_ctx.ctx) if (!vq->call_ctx.ctx)
return; return;
vq->call_ctx.producer.token = vq->call_ctx.ctx;
vq->call_ctx.producer.irq = irq; vq->call_ctx.producer.irq = irq;
ret = irq_bypass_register_producer(&vq->call_ctx.producer); ret = irq_bypass_register_producer(&vq->call_ctx.producer);
if (unlikely(ret)) if (unlikely(ret))
...@@ -709,6 +707,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ...@@ -709,6 +707,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
vq->last_avail_idx = vq_state.split.avail_index; vq->last_avail_idx = vq_state.split.avail_index;
} }
break; break;
case VHOST_SET_VRING_CALL:
if (vq->call_ctx.ctx) {
if (ops->get_status(vdpa) &
VIRTIO_CONFIG_S_DRIVER_OK)
vhost_vdpa_unsetup_vq_irq(v, idx);
vq->call_ctx.producer.token = NULL;
}
break;
} }
r = vhost_vring_ioctl(&v->vdev, cmd, argp); r = vhost_vring_ioctl(&v->vdev, cmd, argp);
...@@ -747,13 +753,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ...@@ -747,13 +753,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
cb.callback = vhost_vdpa_virtqueue_cb; cb.callback = vhost_vdpa_virtqueue_cb;
cb.private = vq; cb.private = vq;
cb.trigger = vq->call_ctx.ctx; cb.trigger = vq->call_ctx.ctx;
vq->call_ctx.producer.token = vq->call_ctx.ctx;
if (ops->get_status(vdpa) &
VIRTIO_CONFIG_S_DRIVER_OK)
vhost_vdpa_setup_vq_irq(v, idx);
} else { } else {
cb.callback = NULL; cb.callback = NULL;
cb.private = NULL; cb.private = NULL;
cb.trigger = NULL; cb.trigger = NULL;
} }
ops->set_vq_cb(vdpa, idx, &cb); ops->set_vq_cb(vdpa, idx, &cb);
vhost_vdpa_setup_vq_irq(v, idx);
break; break;
case VHOST_SET_VRING_NUM: case VHOST_SET_VRING_NUM:
...@@ -1419,6 +1428,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) ...@@ -1419,6 +1428,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
for (i = 0; i < nvqs; i++) { for (i = 0; i < nvqs; i++) {
vqs[i] = &v->vqs[i]; vqs[i] = &v->vqs[i];
vqs[i]->handle_kick = handle_vq_kick; vqs[i]->handle_kick = handle_vq_kick;
vqs[i]->call_ctx.ctx = NULL;
} }
vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
vhost_vdpa_process_iotlb_msg); vhost_vdpa_process_iotlb_msg);
......
...@@ -355,6 +355,8 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb) ...@@ -355,6 +355,8 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
{ {
unsigned long events[NR_VM_EVENT_ITEMS]; unsigned long events[NR_VM_EVENT_ITEMS];
unsigned int idx = 0; unsigned int idx = 0;
unsigned int zid;
unsigned long stall = 0;
all_vm_events(events); all_vm_events(events);
update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
...@@ -363,6 +365,22 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb) ...@@ -363,6 +365,22 @@ static inline unsigned int update_balloon_vm_stats(struct virtio_balloon *vb)
pages_to_bytes(events[PSWPOUT])); pages_to_bytes(events[PSWPOUT]));
update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
update_stat(vb, idx++, VIRTIO_BALLOON_S_OOM_KILL, events[OOM_KILL]);
/* sum all the stall events */
for (zid = 0; zid < MAX_NR_ZONES; zid++)
stall += events[ALLOCSTALL_NORMAL - ZONE_NORMAL + zid];
update_stat(vb, idx++, VIRTIO_BALLOON_S_ALLOC_STALL, stall);
update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_SCAN,
pages_to_bytes(events[PGSCAN_KSWAPD]));
update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_SCAN,
pages_to_bytes(events[PGSCAN_DIRECT]));
update_stat(vb, idx++, VIRTIO_BALLOON_S_ASYNC_RECLAIM,
pages_to_bytes(events[PGSTEAL_KSWAPD]));
update_stat(vb, idx++, VIRTIO_BALLOON_S_DIRECT_RECLAIM,
pages_to_bytes(events[PGSTEAL_DIRECT]));
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC, update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
......
...@@ -56,12 +56,14 @@ struct virtio_fs_vq { ...@@ -56,12 +56,14 @@ struct virtio_fs_vq {
bool connected; bool connected;
long in_flight; long in_flight;
struct completion in_flight_zero; /* No inflight requests */ struct completion in_flight_zero; /* No inflight requests */
struct kobject *kobj;
char name[VQ_NAME_LEN]; char name[VQ_NAME_LEN];
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
/* A virtio-fs device instance */ /* A virtio-fs device instance */
struct virtio_fs { struct virtio_fs {
struct kobject kobj; struct kobject kobj;
struct kobject *mqs_kobj;
struct list_head list; /* on virtio_fs_instances */ struct list_head list; /* on virtio_fs_instances */
char *tag; char *tag;
struct virtio_fs_vq *vqs; struct virtio_fs_vq *vqs;
...@@ -200,19 +202,94 @@ static const struct kobj_type virtio_fs_ktype = { ...@@ -200,19 +202,94 @@ static const struct kobj_type virtio_fs_ktype = {
.default_groups = virtio_fs_groups, .default_groups = virtio_fs_groups,
}; };
static struct virtio_fs_vq *virtio_fs_kobj_to_vq(struct virtio_fs *fs,
struct kobject *kobj)
{
int i;
for (i = 0; i < fs->nvqs; i++) {
if (kobj == fs->vqs[i].kobj)
return &fs->vqs[i];
}
return NULL;
}
static ssize_t name_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj);
struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj);
if (!fsvq)
return -EINVAL;
return sysfs_emit(buf, "%s\n", fsvq->name);
}
static struct kobj_attribute virtio_fs_vq_name_attr = __ATTR_RO(name);
static ssize_t cpu_list_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct virtio_fs *fs = container_of(kobj->parent->parent, struct virtio_fs, kobj);
struct virtio_fs_vq *fsvq = virtio_fs_kobj_to_vq(fs, kobj);
unsigned int cpu, qid;
const size_t size = PAGE_SIZE - 1;
bool first = true;
int ret = 0, pos = 0;
if (!fsvq)
return -EINVAL;
qid = fsvq->vq->index;
for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
if (qid < VQ_REQUEST || (fs->mq_map[cpu] == qid - VQ_REQUEST)) {
if (first)
ret = snprintf(buf + pos, size - pos, "%u", cpu);
else
ret = snprintf(buf + pos, size - pos, ", %u", cpu);
if (ret >= size - pos)
break;
first = false;
pos += ret;
}
}
ret = snprintf(buf + pos, size + 1 - pos, "\n");
return pos + ret;
}
static struct kobj_attribute virtio_fs_vq_cpu_list_attr = __ATTR_RO(cpu_list);
static struct attribute *virtio_fs_vq_attrs[] = {
&virtio_fs_vq_name_attr.attr,
&virtio_fs_vq_cpu_list_attr.attr,
NULL
};
static struct attribute_group virtio_fs_vq_attr_group = {
.attrs = virtio_fs_vq_attrs,
};
/* Make sure virtiofs_mutex is held */ /* Make sure virtiofs_mutex is held */
static void virtio_fs_put(struct virtio_fs *fs) static void virtio_fs_put_locked(struct virtio_fs *fs)
{ {
lockdep_assert_held(&virtio_fs_mutex);
kobject_put(&fs->kobj); kobject_put(&fs->kobj);
} }
static void virtio_fs_put(struct virtio_fs *fs)
{
mutex_lock(&virtio_fs_mutex);
virtio_fs_put_locked(fs);
mutex_unlock(&virtio_fs_mutex);
}
static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
{ {
struct virtio_fs *vfs = fiq->priv; struct virtio_fs *vfs = fiq->priv;
mutex_lock(&virtio_fs_mutex);
virtio_fs_put(vfs); virtio_fs_put(vfs);
mutex_unlock(&virtio_fs_mutex);
} }
static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
...@@ -273,6 +350,50 @@ static void virtio_fs_start_all_queues(struct virtio_fs *fs) ...@@ -273,6 +350,50 @@ static void virtio_fs_start_all_queues(struct virtio_fs *fs)
} }
} }
static void virtio_fs_delete_queues_sysfs(struct virtio_fs *fs)
{
struct virtio_fs_vq *fsvq;
int i;
for (i = 0; i < fs->nvqs; i++) {
fsvq = &fs->vqs[i];
kobject_put(fsvq->kobj);
}
}
static int virtio_fs_add_queues_sysfs(struct virtio_fs *fs)
{
struct virtio_fs_vq *fsvq;
char buff[12];
int i, j, ret;
for (i = 0; i < fs->nvqs; i++) {
fsvq = &fs->vqs[i];
sprintf(buff, "%d", i);
fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj);
if (!fs->mqs_kobj) {
ret = -ENOMEM;
goto out_del;
}
ret = sysfs_create_group(fsvq->kobj, &virtio_fs_vq_attr_group);
if (ret) {
kobject_put(fsvq->kobj);
goto out_del;
}
}
return 0;
out_del:
for (j = 0; j < i; j++) {
fsvq = &fs->vqs[j];
kobject_put(fsvq->kobj);
}
return ret;
}
/* Add a new instance to the list or return -EEXIST if tag name exists*/ /* Add a new instance to the list or return -EEXIST if tag name exists*/
static int virtio_fs_add_instance(struct virtio_device *vdev, static int virtio_fs_add_instance(struct virtio_device *vdev,
struct virtio_fs *fs) struct virtio_fs *fs)
...@@ -296,17 +417,22 @@ static int virtio_fs_add_instance(struct virtio_device *vdev, ...@@ -296,17 +417,22 @@ static int virtio_fs_add_instance(struct virtio_device *vdev,
*/ */
fs->kobj.kset = virtio_fs_kset; fs->kobj.kset = virtio_fs_kset;
ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index); ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index);
if (ret < 0) { if (ret < 0)
mutex_unlock(&virtio_fs_mutex); goto out_unlock;
return ret;
fs->mqs_kobj = kobject_create_and_add("mqs", &fs->kobj);
if (!fs->mqs_kobj) {
ret = -ENOMEM;
goto out_del;
} }
ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device"); ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device");
if (ret < 0) { if (ret < 0)
kobject_del(&fs->kobj); goto out_put;
mutex_unlock(&virtio_fs_mutex);
return ret; ret = virtio_fs_add_queues_sysfs(fs);
} if (ret)
goto out_remove;
list_add_tail(&fs->list, &virtio_fs_instances); list_add_tail(&fs->list, &virtio_fs_instances);
...@@ -315,6 +441,16 @@ static int virtio_fs_add_instance(struct virtio_device *vdev, ...@@ -315,6 +441,16 @@ static int virtio_fs_add_instance(struct virtio_device *vdev,
kobject_uevent(&fs->kobj, KOBJ_ADD); kobject_uevent(&fs->kobj, KOBJ_ADD);
return 0; return 0;
out_remove:
sysfs_remove_link(&fs->kobj, "device");
out_put:
kobject_put(fs->mqs_kobj);
out_del:
kobject_del(&fs->kobj);
out_unlock:
mutex_unlock(&virtio_fs_mutex);
return ret;
} }
/* Return the virtio_fs with a given tag, or NULL */ /* Return the virtio_fs with a given tag, or NULL */
...@@ -1043,7 +1179,9 @@ static void virtio_fs_remove(struct virtio_device *vdev) ...@@ -1043,7 +1179,9 @@ static void virtio_fs_remove(struct virtio_device *vdev)
mutex_lock(&virtio_fs_mutex); mutex_lock(&virtio_fs_mutex);
/* This device is going away. No one should get new reference */ /* This device is going away. No one should get new reference */
list_del_init(&fs->list); list_del_init(&fs->list);
virtio_fs_delete_queues_sysfs(fs);
sysfs_remove_link(&fs->kobj, "device"); sysfs_remove_link(&fs->kobj, "device");
kobject_put(fs->mqs_kobj);
kobject_del(&fs->kobj); kobject_del(&fs->kobj);
virtio_fs_stop_all_queues(fs); virtio_fs_stop_all_queues(fs);
virtio_fs_drain_all_queues_locked(fs); virtio_fs_drain_all_queues_locked(fs);
...@@ -1052,7 +1190,7 @@ static void virtio_fs_remove(struct virtio_device *vdev) ...@@ -1052,7 +1190,7 @@ static void virtio_fs_remove(struct virtio_device *vdev)
vdev->priv = NULL; vdev->priv = NULL;
/* Put device reference on virtio_fs object */ /* Put device reference on virtio_fs object */
virtio_fs_put(fs); virtio_fs_put_locked(fs);
mutex_unlock(&virtio_fs_mutex); mutex_unlock(&virtio_fs_mutex);
} }
...@@ -1581,9 +1719,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc) ...@@ -1581,9 +1719,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
out_err: out_err:
kfree(fc); kfree(fc);
mutex_lock(&virtio_fs_mutex);
virtio_fs_put(fs); virtio_fs_put(fs);
mutex_unlock(&virtio_fs_mutex);
return err; return err;
} }
......
...@@ -582,11 +582,20 @@ void vdpa_set_status(struct vdpa_device *vdev, u8 status); ...@@ -582,11 +582,20 @@ void vdpa_set_status(struct vdpa_device *vdev, u8 status);
* @dev: vdpa device to remove * @dev: vdpa device to remove
* Driver need to remove the specified device by calling * Driver need to remove the specified device by calling
* _vdpa_unregister_device(). * _vdpa_unregister_device().
* @dev_set_attr: change a vdpa device's attr after it was create
* @mdev: parent device to use for device
* @dev: vdpa device structure
* @config:Attributes to be set for the device.
* The driver needs to check the mask of the structure and then set
* the related information to the vdpa device. The driver must return 0
* if set successfully.
*/ */
struct vdpa_mgmtdev_ops { struct vdpa_mgmtdev_ops {
int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name, int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config); const struct vdpa_dev_set_config *config);
void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev);
int (*dev_set_attr)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev,
const struct vdpa_dev_set_config *config);
}; };
/** /**
......
...@@ -19,6 +19,7 @@ enum vdpa_command { ...@@ -19,6 +19,7 @@ enum vdpa_command {
VDPA_CMD_DEV_GET, /* can dump */ VDPA_CMD_DEV_GET, /* can dump */
VDPA_CMD_DEV_CONFIG_GET, /* can dump */ VDPA_CMD_DEV_CONFIG_GET, /* can dump */
VDPA_CMD_DEV_VSTATS_GET, VDPA_CMD_DEV_VSTATS_GET,
VDPA_CMD_DEV_ATTR_SET,
}; };
enum vdpa_attr { enum vdpa_attr {
......
...@@ -71,7 +71,13 @@ struct virtio_balloon_config { ...@@ -71,7 +71,13 @@ struct virtio_balloon_config {
#define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */ #define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */
#define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */ #define VIRTIO_BALLOON_S_HTLB_PGALLOC 8 /* Hugetlb page allocations */
#define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */ #define VIRTIO_BALLOON_S_HTLB_PGFAIL 9 /* Hugetlb page allocation failures */
#define VIRTIO_BALLOON_S_NR 10 #define VIRTIO_BALLOON_S_OOM_KILL 10 /* OOM killer invocations */
#define VIRTIO_BALLOON_S_ALLOC_STALL 11 /* Stall count of memory allocatoin */
#define VIRTIO_BALLOON_S_ASYNC_SCAN 12 /* Amount of memory scanned asynchronously */
#define VIRTIO_BALLOON_S_DIRECT_SCAN 13 /* Amount of memory scanned directly */
#define VIRTIO_BALLOON_S_ASYNC_RECLAIM 14 /* Amount of memory reclaimed asynchronously */
#define VIRTIO_BALLOON_S_DIRECT_RECLAIM 15 /* Amount of memory reclaimed directly */
#define VIRTIO_BALLOON_S_NR 16
#define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \ #define VIRTIO_BALLOON_S_NAMES_WITH_PREFIX(VIRTIO_BALLOON_S_NAMES_prefix) { \
VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \ VIRTIO_BALLOON_S_NAMES_prefix "swap-in", \
...@@ -83,7 +89,13 @@ struct virtio_balloon_config { ...@@ -83,7 +89,13 @@ struct virtio_balloon_config {
VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \ VIRTIO_BALLOON_S_NAMES_prefix "available-memory", \
VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \ VIRTIO_BALLOON_S_NAMES_prefix "disk-caches", \
VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \ VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-allocations", \
VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures" \ VIRTIO_BALLOON_S_NAMES_prefix "hugetlb-failures", \
VIRTIO_BALLOON_S_NAMES_prefix "oom-kills", \
VIRTIO_BALLOON_S_NAMES_prefix "alloc-stalls", \
VIRTIO_BALLOON_S_NAMES_prefix "async-scans", \
VIRTIO_BALLOON_S_NAMES_prefix "direct-scans", \
VIRTIO_BALLOON_S_NAMES_prefix "async-reclaims", \
VIRTIO_BALLOON_S_NAMES_prefix "direct-reclaims" \
} }
#define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("") #define VIRTIO_BALLOON_S_NAMES VIRTIO_BALLOON_S_NAMES_WITH_PREFIX("")
......
...@@ -94,6 +94,63 @@ static u32 virtio_transport_get_local_cid(void) ...@@ -94,6 +94,63 @@ static u32 virtio_transport_get_local_cid(void)
return ret; return ret;
} }
/* Caller need to hold vsock->tx_lock on vq */
static int virtio_transport_send_skb(struct sk_buff *skb, struct virtqueue *vq,
struct virtio_vsock *vsock)
{
int ret, in_sg = 0, out_sg = 0;
struct scatterlist **sgs;
sgs = vsock->out_sgs;
sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
sizeof(*virtio_vsock_hdr(skb)));
out_sg++;
if (!skb_is_nonlinear(skb)) {
if (skb->len > 0) {
sg_init_one(sgs[out_sg], skb->data, skb->len);
out_sg++;
}
} else {
struct skb_shared_info *si;
int i;
/* If skb is nonlinear, then its buffer must contain
* only header and nothing more. Data is stored in
* the fragged part.
*/
WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb)));
si = skb_shinfo(skb);
for (i = 0; i < si->nr_frags; i++) {
skb_frag_t *skb_frag = &si->frags[i];
void *va;
/* We will use 'page_to_virt()' for the userspace page
* here, because virtio or dma-mapping layers will call
* 'virt_to_phys()' later to fill the buffer descriptor.
* We don't touch memory at "virtual" address of this page.
*/
va = page_to_virt(skb_frag_page(skb_frag));
sg_init_one(sgs[out_sg],
va + skb_frag_off(skb_frag),
skb_frag_size(skb_frag));
out_sg++;
}
}
ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
/* Usually this means that there is no more space available in
* the vq
*/
if (ret < 0)
return ret;
virtio_transport_deliver_tap_pkt(skb);
return 0;
}
static void static void
virtio_transport_send_pkt_work(struct work_struct *work) virtio_transport_send_pkt_work(struct work_struct *work)
{ {
...@@ -111,66 +168,22 @@ virtio_transport_send_pkt_work(struct work_struct *work) ...@@ -111,66 +168,22 @@ virtio_transport_send_pkt_work(struct work_struct *work)
vq = vsock->vqs[VSOCK_VQ_TX]; vq = vsock->vqs[VSOCK_VQ_TX];
for (;;) { for (;;) {
int ret, in_sg = 0, out_sg = 0;
struct scatterlist **sgs;
struct sk_buff *skb; struct sk_buff *skb;
bool reply; bool reply;
int ret;
skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
if (!skb) if (!skb)
break; break;
reply = virtio_vsock_skb_reply(skb); reply = virtio_vsock_skb_reply(skb);
sgs = vsock->out_sgs;
sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
sizeof(*virtio_vsock_hdr(skb)));
out_sg++;
if (!skb_is_nonlinear(skb)) {
if (skb->len > 0) {
sg_init_one(sgs[out_sg], skb->data, skb->len);
out_sg++;
}
} else {
struct skb_shared_info *si;
int i;
/* If skb is nonlinear, then its buffer must contain
* only header and nothing more. Data is stored in
* the fragged part.
*/
WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb)));
si = skb_shinfo(skb);
for (i = 0; i < si->nr_frags; i++) {
skb_frag_t *skb_frag = &si->frags[i];
void *va;
/* We will use 'page_to_virt()' for the userspace page ret = virtio_transport_send_skb(skb, vq, vsock);
* here, because virtio or dma-mapping layers will call
* 'virt_to_phys()' later to fill the buffer descriptor.
* We don't touch memory at "virtual" address of this page.
*/
va = page_to_virt(skb_frag_page(skb_frag));
sg_init_one(sgs[out_sg],
va + skb_frag_off(skb_frag),
skb_frag_size(skb_frag));
out_sg++;
}
}
ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
/* Usually this means that there is no more space available in
* the vq
*/
if (ret < 0) { if (ret < 0) {
virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
break; break;
} }
virtio_transport_deliver_tap_pkt(skb);
if (reply) { if (reply) {
struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
int val; int val;
...@@ -195,6 +208,28 @@ virtio_transport_send_pkt_work(struct work_struct *work) ...@@ -195,6 +208,28 @@ virtio_transport_send_pkt_work(struct work_struct *work)
queue_work(virtio_vsock_workqueue, &vsock->rx_work); queue_work(virtio_vsock_workqueue, &vsock->rx_work);
} }
/* Caller need to hold RCU for vsock.
* Returns 0 if the packet is successfully put on the vq.
*/
static int virtio_transport_send_skb_fast_path(struct virtio_vsock *vsock, struct sk_buff *skb)
{
struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX];
int ret;
/* Inside RCU, can't sleep! */
ret = mutex_trylock(&vsock->tx_lock);
if (unlikely(ret == 0))
return -EBUSY;
ret = virtio_transport_send_skb(skb, vq, vsock);
if (ret == 0)
virtqueue_kick(vq);
mutex_unlock(&vsock->tx_lock);
return ret;
}
static int static int
virtio_transport_send_pkt(struct sk_buff *skb) virtio_transport_send_pkt(struct sk_buff *skb)
{ {
...@@ -218,11 +253,20 @@ virtio_transport_send_pkt(struct sk_buff *skb) ...@@ -218,11 +253,20 @@ virtio_transport_send_pkt(struct sk_buff *skb)
goto out_rcu; goto out_rcu;
} }
if (virtio_vsock_skb_reply(skb)) /* If send_pkt_queue is empty, we can safely bypass this queue
atomic_inc(&vsock->queued_replies); * because packet order is maintained and (try) to put the packet
* on the virtqueue using virtio_transport_send_skb_fast_path.
* If this fails we simply put the packet on the intermediate
* queue and schedule the worker.
*/
if (!skb_queue_empty_lockless(&vsock->send_pkt_queue) ||
virtio_transport_send_skb_fast_path(vsock, skb)) {
if (virtio_vsock_skb_reply(skb))
atomic_inc(&vsock->queued_replies);
virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
}
out_rcu: out_rcu:
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -276,7 +276,7 @@ static void help(void) ...@@ -276,7 +276,7 @@ static void help(void)
fprintf(stderr, "Usage: <test> [--help]" fprintf(stderr, "Usage: <test> [--help]"
" [--host-affinity H]" " [--host-affinity H]"
" [--guest-affinity G]" " [--guest-affinity G]"
" [--ring-size R (default: %d)]" " [--ring-size R (default: %u)]"
" [--run-cycles C (default: %d)]" " [--run-cycles C (default: %d)]"
" [--batch b]" " [--batch b]"
" [--outstanding o]" " [--outstanding o]"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment