Commit 7a53e17a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:

 - A huge patchset supporting vq resize using the new vq reset
   capability

 - Features, fixes, and cleanups all over the place

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (88 commits)
  vdpa/mlx5: Fix possible uninitialized return value
  vdpa_sim_blk: add support for discard and write-zeroes
  vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH
  vdpa_sim_blk: make vdpasim_blk_check_range usable by other requests
  vdpa_sim_blk: check if sector is 0 for commands other than read or write
  vdpa_sim: Implement suspend vdpa op
  vhost-vdpa: uAPI to suspend the device
  vhost-vdpa: introduce SUSPEND backend feature bit
  vdpa: Add suspend operation
  virtio-blk: Avoid use-after-free on suspend/resume
  virtio_vdpa: support the arg sizes of find_vqs()
  vhost-vdpa: Call ida_simple_remove() when failed
  vDPA: fix 'cast to restricted le16' warnings in vdpa.c
  vDPA: !FEATURES_OK should not block querying device config space
  vDPA/ifcvf: support userspace to query features and MQ of a management device
  vDPA/ifcvf: get_config_size should return a value no greater than dev implementation
  vhost scsi: Allow user to control num virtqueues
  vhost-scsi: Fix max number of virtqueues
  vdpa/mlx5: Support different address spaces for control and data
  vdpa/mlx5: Implement susupend virtqueue callback
  ...
parents 999324f5 93e530d2
...@@ -33,6 +33,10 @@ properties: ...@@ -33,6 +33,10 @@ properties:
description: Required for devices making accesses thru an IOMMU. description: Required for devices making accesses thru an IOMMU.
maxItems: 1 maxItems: 1
wakeup-source:
type: boolean
description: Required for setting irq of a virtio_mmio device as wakeup source.
required: required:
- compatible - compatible
- reg - reg
......
...@@ -958,6 +958,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, ...@@ -958,6 +958,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
goto error_create; goto error_create;
} }
vq->priv = info; vq->priv = info;
vq->num_max = num;
num = virtqueue_get_vring_size(vq); num = virtqueue_get_vring_size(vq);
if (vu_dev->protocol_features & if (vu_dev->protocol_features &
...@@ -1010,7 +1011,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, ...@@ -1010,7 +1011,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
......
...@@ -101,6 +101,14 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr) ...@@ -101,6 +101,14 @@ static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
} }
} }
static inline struct virtio_blk_vq *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
return vq;
}
static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr) static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
{ {
struct scatterlist hdr, status, *sgs[3]; struct scatterlist hdr, status, *sgs[3];
...@@ -416,7 +424,7 @@ static void virtio_queue_rqs(struct request **rqlist) ...@@ -416,7 +424,7 @@ static void virtio_queue_rqs(struct request **rqlist)
struct request *requeue_list = NULL; struct request *requeue_list = NULL;
rq_list_for_each_safe(rqlist, req, next) { rq_list_for_each_safe(rqlist, req, next) {
struct virtio_blk_vq *vq = req->mq_hctx->driver_data; struct virtio_blk_vq *vq = get_virtio_blk_vq(req->mq_hctx);
bool kick; bool kick;
if (!virtblk_prep_rq_batch(req)) { if (!virtblk_prep_rq_batch(req)) {
...@@ -837,7 +845,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob) ...@@ -837,7 +845,7 @@ static void virtblk_complete_batch(struct io_comp_batch *iob)
static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{ {
struct virtio_blk *vblk = hctx->queue->queuedata; struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtio_blk_vq *vq = hctx->driver_data; struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
struct virtblk_req *vbr; struct virtblk_req *vbr;
unsigned long flags; unsigned long flags;
unsigned int len; unsigned int len;
...@@ -862,22 +870,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) ...@@ -862,22 +870,10 @@ static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
return found; return found;
} }
static int virtblk_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct virtio_blk *vblk = data;
struct virtio_blk_vq *vq = &vblk->vqs[hctx_idx];
WARN_ON(vblk->tag_set.tags[hctx_idx] != hctx->tags);
hctx->driver_data = vq;
return 0;
}
static const struct blk_mq_ops virtio_mq_ops = { static const struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq, .queue_rq = virtio_queue_rq,
.queue_rqs = virtio_queue_rqs, .queue_rqs = virtio_queue_rqs,
.commit_rqs = virtio_commit_rqs, .commit_rqs = virtio_commit_rqs,
.init_hctx = virtblk_init_hctx,
.complete = virtblk_request_done, .complete = virtblk_request_done,
.map_queues = virtblk_map_queues, .map_queues = virtblk_map_queues,
.poll = virtblk_poll, .poll = virtblk_poll,
......
...@@ -135,6 +135,9 @@ struct send_queue { ...@@ -135,6 +135,9 @@ struct send_queue {
struct virtnet_sq_stats stats; struct virtnet_sq_stats stats;
struct napi_struct napi; struct napi_struct napi;
/* Record whether sq is in reset state. */
bool reset;
}; };
/* Internal representation of a receive virtqueue */ /* Internal representation of a receive virtqueue */
...@@ -267,6 +270,12 @@ struct virtnet_info { ...@@ -267,6 +270,12 @@ struct virtnet_info {
u8 duplex; u8 duplex;
u32 speed; u32 speed;
/* Interrupt coalescing settings */
u32 tx_usecs;
u32 rx_usecs;
u32 tx_max_packets;
u32 rx_max_packets;
unsigned long guest_offloads; unsigned long guest_offloads;
unsigned long guest_offloads_capable; unsigned long guest_offloads_capable;
...@@ -284,6 +293,9 @@ struct padded_vnet_hdr { ...@@ -284,6 +293,9 @@ struct padded_vnet_hdr {
char padding[12]; char padding[12];
}; };
static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
static bool is_xdp_frame(void *ptr) static bool is_xdp_frame(void *ptr)
{ {
return (unsigned long)ptr & VIRTIO_XDP_FLAG; return (unsigned long)ptr & VIRTIO_XDP_FLAG;
...@@ -1628,6 +1640,11 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) ...@@ -1628,6 +1640,11 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
return; return;
if (__netif_tx_trylock(txq)) { if (__netif_tx_trylock(txq)) {
if (sq->reset) {
__netif_tx_unlock(txq);
return;
}
do { do {
virtqueue_disable_cb(sq->vq); virtqueue_disable_cb(sq->vq);
free_old_xmit_skbs(sq, true); free_old_xmit_skbs(sq, true);
...@@ -1875,6 +1892,70 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1875,6 +1892,70 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
static int virtnet_rx_resize(struct virtnet_info *vi,
struct receive_queue *rq, u32 ring_num)
{
bool running = netif_running(vi->dev);
int err, qindex;
qindex = rq - vi->rq;
if (running)
napi_disable(&rq->napi);
err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
if (err)
netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
if (!try_fill_recv(vi, rq, GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
if (running)
virtnet_napi_enable(rq->vq, &rq->napi);
return err;
}
static int virtnet_tx_resize(struct virtnet_info *vi,
struct send_queue *sq, u32 ring_num)
{
bool running = netif_running(vi->dev);
struct netdev_queue *txq;
int err, qindex;
qindex = sq - vi->sq;
if (running)
virtnet_napi_tx_disable(&sq->napi);
txq = netdev_get_tx_queue(vi->dev, qindex);
/* 1. wait all ximt complete
* 2. fix the race of netif_stop_subqueue() vs netif_start_subqueue()
*/
__netif_tx_lock_bh(txq);
/* Prevent rx poll from accessing sq. */
sq->reset = true;
/* Prevent the upper layer from trying to send packets. */
netif_stop_subqueue(vi->dev, qindex);
__netif_tx_unlock_bh(txq);
err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
if (err)
netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
__netif_tx_lock_bh(txq);
sq->reset = false;
netif_tx_wake_queue(txq);
__netif_tx_unlock_bh(txq);
if (running)
virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
return err;
}
/* /*
* Send command via the control virtqueue and check status. Commands * Send command via the control virtqueue and check status. Commands
* supported by the hypervisor, as indicated by feature bits, should * supported by the hypervisor, as indicated by feature bits, should
...@@ -2285,10 +2366,57 @@ static void virtnet_get_ringparam(struct net_device *dev, ...@@ -2285,10 +2366,57 @@ static void virtnet_get_ringparam(struct net_device *dev,
{ {
struct virtnet_info *vi = netdev_priv(dev); struct virtnet_info *vi = netdev_priv(dev);
ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq); ring->rx_max_pending = vi->rq[0].vq->num_max;
ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq); ring->tx_max_pending = vi->sq[0].vq->num_max;
ring->rx_pending = ring->rx_max_pending; ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
ring->tx_pending = ring->tx_max_pending; ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
}
static int virtnet_set_ringparam(struct net_device *dev,
struct ethtool_ringparam *ring,
struct kernel_ethtool_ringparam *kernel_ring,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
u32 rx_pending, tx_pending;
struct receive_queue *rq;
struct send_queue *sq;
int i, err;
if (ring->rx_mini_pending || ring->rx_jumbo_pending)
return -EINVAL;
rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
if (ring->rx_pending == rx_pending &&
ring->tx_pending == tx_pending)
return 0;
if (ring->rx_pending > vi->rq[0].vq->num_max)
return -EINVAL;
if (ring->tx_pending > vi->sq[0].vq->num_max)
return -EINVAL;
for (i = 0; i < vi->max_queue_pairs; i++) {
rq = vi->rq + i;
sq = vi->sq + i;
if (ring->tx_pending != tx_pending) {
err = virtnet_tx_resize(vi, sq, ring->tx_pending);
if (err)
return err;
}
if (ring->rx_pending != rx_pending) {
err = virtnet_rx_resize(vi, rq, ring->rx_pending);
if (err)
return err;
}
}
return 0;
} }
static bool virtnet_commit_rss_command(struct virtnet_info *vi) static bool virtnet_commit_rss_command(struct virtnet_info *vi)
...@@ -2618,27 +2746,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, ...@@ -2618,27 +2746,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
return 0; return 0;
} }
static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
struct ethtool_coalesce *ec)
{
struct scatterlist sgs_tx, sgs_rx;
struct virtio_net_ctrl_coal_tx coal_tx;
struct virtio_net_ctrl_coal_rx coal_rx;
coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
&sgs_tx))
return -EINVAL;
/* Save parameters */
vi->tx_usecs = ec->tx_coalesce_usecs;
vi->tx_max_packets = ec->tx_max_coalesced_frames;
coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
&sgs_rx))
return -EINVAL;
/* Save parameters */
vi->rx_usecs = ec->rx_coalesce_usecs;
vi->rx_max_packets = ec->rx_max_coalesced_frames;
return 0;
}
static int virtnet_coal_params_supported(struct ethtool_coalesce *ec)
{
/* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL
* feature is negotiated.
*/
if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs)
return -EOPNOTSUPP;
if (ec->tx_max_coalesced_frames > 1 ||
ec->rx_max_coalesced_frames != 1)
return -EINVAL;
return 0;
}
static int virtnet_set_coalesce(struct net_device *dev, static int virtnet_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *ec, struct ethtool_coalesce *ec,
struct kernel_ethtool_coalesce *kernel_coal, struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct virtnet_info *vi = netdev_priv(dev); struct virtnet_info *vi = netdev_priv(dev);
int i, napi_weight; int ret, i, napi_weight;
bool update_napi = false;
if (ec->tx_max_coalesced_frames > 1 ||
ec->rx_max_coalesced_frames != 1)
return -EINVAL;
/* Can't change NAPI weight if the link is up */
napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0;
if (napi_weight ^ vi->sq[0].napi.weight) { if (napi_weight ^ vi->sq[0].napi.weight) {
if (dev->flags & IFF_UP) if (dev->flags & IFF_UP)
return -EBUSY; return -EBUSY;
else
update_napi = true;
}
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL))
ret = virtnet_send_notf_coal_cmds(vi, ec);
else
ret = virtnet_coal_params_supported(ec);
if (ret)
return ret;
if (update_napi) {
for (i = 0; i < vi->max_queue_pairs; i++) for (i = 0; i < vi->max_queue_pairs; i++)
vi->sq[i].napi.weight = napi_weight; vi->sq[i].napi.weight = napi_weight;
} }
return 0; return ret;
} }
static int virtnet_get_coalesce(struct net_device *dev, static int virtnet_get_coalesce(struct net_device *dev,
...@@ -2646,16 +2836,19 @@ static int virtnet_get_coalesce(struct net_device *dev, ...@@ -2646,16 +2836,19 @@ static int virtnet_get_coalesce(struct net_device *dev,
struct kernel_ethtool_coalesce *kernel_coal, struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct ethtool_coalesce ec_default = {
.cmd = ETHTOOL_GCOALESCE,
.rx_max_coalesced_frames = 1,
};
struct virtnet_info *vi = netdev_priv(dev); struct virtnet_info *vi = netdev_priv(dev);
memcpy(ec, &ec_default, sizeof(ec_default)); if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
ec->rx_coalesce_usecs = vi->rx_usecs;
ec->tx_coalesce_usecs = vi->tx_usecs;
ec->tx_max_coalesced_frames = vi->tx_max_packets;
ec->rx_max_coalesced_frames = vi->rx_max_packets;
} else {
ec->rx_max_coalesced_frames = 1;
if (vi->sq[0].napi.weight) if (vi->sq[0].napi.weight)
ec->tx_max_coalesced_frames = 1; ec->tx_max_coalesced_frames = 1;
}
return 0; return 0;
} }
...@@ -2774,10 +2967,12 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) ...@@ -2774,10 +2967,12 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
} }
static const struct ethtool_ops virtnet_ethtool_ops = { static const struct ethtool_ops virtnet_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
ETHTOOL_COALESCE_USECS,
.get_drvinfo = virtnet_get_drvinfo, .get_drvinfo = virtnet_get_drvinfo,
.get_link = ethtool_op_get_link, .get_link = ethtool_op_get_link,
.get_ringparam = virtnet_get_ringparam, .get_ringparam = virtnet_get_ringparam,
.set_ringparam = virtnet_set_ringparam,
.get_strings = virtnet_get_strings, .get_strings = virtnet_get_strings,
.get_sset_count = virtnet_get_sset_count, .get_sset_count = virtnet_get_sset_count,
.get_ethtool_stats = virtnet_get_ethtool_stats, .get_ethtool_stats = virtnet_get_ethtool_stats,
...@@ -3171,6 +3366,27 @@ static void free_receive_page_frags(struct virtnet_info *vi) ...@@ -3171,6 +3366,27 @@ static void free_receive_page_frags(struct virtnet_info *vi)
put_page(vi->rq[i].alloc_frag.page); put_page(vi->rq[i].alloc_frag.page);
} }
static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
{
if (!is_xdp_frame(buf))
dev_kfree_skb(buf);
else
xdp_return_frame(ptr_to_xdp(buf));
}
static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf)
{
struct virtnet_info *vi = vq->vdev->priv;
int i = vq2rxq(vq);
if (vi->mergeable_rx_bufs)
put_page(virt_to_head_page(buf));
else if (vi->big_packets)
give_pages(&vi->rq[i], buf);
else
put_page(virt_to_head_page(buf));
}
static void free_unused_bufs(struct virtnet_info *vi) static void free_unused_bufs(struct virtnet_info *vi)
{ {
void *buf; void *buf;
...@@ -3178,26 +3394,14 @@ static void free_unused_bufs(struct virtnet_info *vi) ...@@ -3178,26 +3394,14 @@ static void free_unused_bufs(struct virtnet_info *vi)
for (i = 0; i < vi->max_queue_pairs; i++) { for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->sq[i].vq; struct virtqueue *vq = vi->sq[i].vq;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
if (!is_xdp_frame(buf)) virtnet_sq_free_unused_buf(vq, buf);
dev_kfree_skb(buf);
else
xdp_return_frame(ptr_to_xdp(buf));
}
} }
for (i = 0; i < vi->max_queue_pairs; i++) { for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->rq[i].vq; struct virtqueue *vq = vi->rq[i].vq;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { virtnet_rq_free_unused_buf(vq, buf);
if (vi->mergeable_rx_bufs) {
put_page(virt_to_head_page(buf));
} else if (vi->big_packets) {
give_pages(&vi->rq[i], buf);
} else {
put_page(virt_to_head_page(buf));
}
}
} }
} }
...@@ -3228,6 +3432,29 @@ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqu ...@@ -3228,6 +3432,29 @@ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqu
(unsigned int)GOOD_PACKET_LEN); (unsigned int)GOOD_PACKET_LEN);
} }
static void virtnet_config_sizes(struct virtnet_info *vi, u32 *sizes)
{
u32 i, rx_size, tx_size;
if (vi->speed == SPEED_UNKNOWN || vi->speed < SPEED_10000) {
rx_size = 1024;
tx_size = 1024;
} else if (vi->speed < SPEED_40000) {
rx_size = 1024 * 4;
tx_size = 1024 * 4;
} else {
rx_size = 1024 * 8;
tx_size = 1024 * 8;
}
for (i = 0; i < vi->max_queue_pairs; i++) {
sizes[rxq2vq(i)] = rx_size;
sizes[txq2vq(i)] = tx_size;
}
}
static int virtnet_find_vqs(struct virtnet_info *vi) static int virtnet_find_vqs(struct virtnet_info *vi)
{ {
vq_callback_t **callbacks; vq_callback_t **callbacks;
...@@ -3235,6 +3462,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi) ...@@ -3235,6 +3462,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
int ret = -ENOMEM; int ret = -ENOMEM;
int i, total_vqs; int i, total_vqs;
const char **names; const char **names;
u32 *sizes;
bool *ctx; bool *ctx;
/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
...@@ -3262,10 +3490,15 @@ static int virtnet_find_vqs(struct virtnet_info *vi) ...@@ -3262,10 +3490,15 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
ctx = NULL; ctx = NULL;
} }
sizes = kmalloc_array(total_vqs, sizeof(*sizes), GFP_KERNEL);
if (!sizes)
goto err_sizes;
/* Parameters for control virtqueue, if any */ /* Parameters for control virtqueue, if any */
if (vi->has_cvq) { if (vi->has_cvq) {
callbacks[total_vqs - 1] = NULL; callbacks[total_vqs - 1] = NULL;
names[total_vqs - 1] = "control"; names[total_vqs - 1] = "control";
sizes[total_vqs - 1] = 64;
} }
/* Allocate/initialize parameters for send/receive virtqueues */ /* Allocate/initialize parameters for send/receive virtqueues */
...@@ -3280,8 +3513,10 @@ static int virtnet_find_vqs(struct virtnet_info *vi) ...@@ -3280,8 +3513,10 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
ctx[rxq2vq(i)] = true; ctx[rxq2vq(i)] = true;
} }
ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks, virtnet_config_sizes(vi, sizes);
names, ctx, NULL);
ret = virtio_find_vqs_ctx_size(vi->vdev, total_vqs, vqs, callbacks,
names, sizes, ctx, NULL);
if (ret) if (ret)
goto err_find; goto err_find;
...@@ -3301,6 +3536,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi) ...@@ -3301,6 +3536,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
err_find: err_find:
kfree(sizes);
err_sizes:
kfree(ctx); kfree(ctx);
err_ctx: err_ctx:
kfree(names); kfree(names);
...@@ -3444,6 +3681,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) ...@@ -3444,6 +3681,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
"VIRTIO_NET_F_CTRL_VQ") || "VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
"VIRTIO_NET_F_CTRL_VQ") ||
VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL,
"VIRTIO_NET_F_CTRL_VQ"))) { "VIRTIO_NET_F_CTRL_VQ"))) {
return false; return false;
} }
...@@ -3580,6 +3819,13 @@ static int virtnet_probe(struct virtio_device *vdev) ...@@ -3580,6 +3819,13 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
vi->mergeable_rx_bufs = true; vi->mergeable_rx_bufs = true;
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) {
vi->rx_usecs = 0;
vi->tx_usecs = 0;
vi->tx_max_packets = 0;
vi->rx_max_packets = 0;
}
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true; vi->has_rss_hash_report = true;
...@@ -3651,6 +3897,9 @@ static int virtnet_probe(struct virtio_device *vdev) ...@@ -3651,6 +3897,9 @@ static int virtnet_probe(struct virtio_device *vdev)
vi->curr_queue_pairs = num_online_cpus(); vi->curr_queue_pairs = num_online_cpus();
vi->max_queue_pairs = max_queue_pairs; vi->max_queue_pairs = max_queue_pairs;
virtnet_init_settings(dev);
virtnet_update_settings(vi);
/* Allocate/initialize the rx/tx queues, and invoke find_vqs */ /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
err = init_vqs(vi); err = init_vqs(vi);
if (err) if (err)
...@@ -3663,8 +3912,6 @@ static int virtnet_probe(struct virtio_device *vdev) ...@@ -3663,8 +3912,6 @@ static int virtnet_probe(struct virtio_device *vdev)
netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
virtnet_init_settings(dev);
if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
vi->failover = net_failover_create(vi->dev); vi->failover = net_failover_create(vi->dev);
if (IS_ERR(vi->failover)) { if (IS_ERR(vi->failover)) {
...@@ -3814,7 +4061,7 @@ static struct virtio_device_id id_table[] = { ...@@ -3814,7 +4061,7 @@ static struct virtio_device_id id_table[] = {
VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_CTRL_MAC_ADDR, \
VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
static unsigned int features[] = { static unsigned int features[] = {
VIRTNET_FEATURES, VIRTNET_FEATURES,
......
...@@ -81,17 +81,24 @@ static int virtio_pmem_probe(struct virtio_device *vdev) ...@@ -81,17 +81,24 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
ndr_desc.res = &res; ndr_desc.res = &res;
ndr_desc.numa_node = nid; ndr_desc.numa_node = nid;
ndr_desc.flush = async_pmem_flush; ndr_desc.flush = async_pmem_flush;
ndr_desc.provider_data = vdev;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
set_bit(ND_REGION_ASYNC, &ndr_desc.flags); set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
/*
* The NVDIMM region could be available before the
* virtio_device_ready() that is called by
* virtio_dev_probe(), so we set device ready here.
*/
virtio_device_ready(vdev);
nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
if (!nd_region) { if (!nd_region) {
dev_err(&vdev->dev, "failed to create nvdimm region\n"); dev_err(&vdev->dev, "failed to create nvdimm region\n");
err = -ENXIO; err = -ENXIO;
goto out_nd; goto out_nd;
} }
nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
return 0; return 0;
out_nd: out_nd:
virtio_reset_device(vdev);
nvdimm_bus_unregister(vpmem->nvdimm_bus); nvdimm_bus_unregister(vpmem->nvdimm_bus);
out_vq: out_vq:
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
......
...@@ -928,6 +928,7 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev, ...@@ -928,6 +928,7 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
...@@ -959,6 +960,8 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev, ...@@ -959,6 +960,8 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
goto error; goto error;
} }
vq->num_max = vring->num;
vqs[i] = vq; vqs[i] = vq;
vring->vq = vq; vring->vq = vq;
vq->priv = vring; vq->priv = vring;
......
...@@ -335,7 +335,7 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i) ...@@ -335,7 +335,7 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
size_t size; size_t size;
/* actual size of vring (in bytes) */ /* actual size of vring (in bytes) */
size = PAGE_ALIGN(vring_size(rvring->len, rvring->align)); size = PAGE_ALIGN(vring_size(rvring->num, rvring->align));
rsc = (void *)rproc->table_ptr + rvdev->rsc_offset; rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
...@@ -402,7 +402,7 @@ rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i) ...@@ -402,7 +402,7 @@ rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i)
return -EINVAL; return -EINVAL;
} }
rvring->len = vring->num; rvring->num = vring->num;
rvring->align = vring->align; rvring->align = vring->align;
rvring->rvdev = rvdev; rvring->rvdev = rvdev;
......
...@@ -87,7 +87,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, ...@@ -87,7 +87,7 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
struct fw_rsc_vdev *rsc; struct fw_rsc_vdev *rsc;
struct virtqueue *vq; struct virtqueue *vq;
void *addr; void *addr;
int len, size; int num, size;
/* we're temporarily limited to two virtqueues per rvdev */ /* we're temporarily limited to two virtqueues per rvdev */
if (id >= ARRAY_SIZE(rvdev->vring)) if (id >= ARRAY_SIZE(rvdev->vring))
...@@ -104,20 +104,20 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, ...@@ -104,20 +104,20 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
rvring = &rvdev->vring[id]; rvring = &rvdev->vring[id];
addr = mem->va; addr = mem->va;
len = rvring->len; num = rvring->num;
/* zero vring */ /* zero vring */
size = vring_size(len, rvring->align); size = vring_size(num, rvring->align);
memset(addr, 0, size); memset(addr, 0, size);
dev_dbg(dev, "vring%d: va %pK qsz %d notifyid %d\n", dev_dbg(dev, "vring%d: va %pK qsz %d notifyid %d\n",
id, addr, len, rvring->notifyid); id, addr, num, rvring->notifyid);
/* /*
* Create the new vq, and tell virtio we're not interested in * Create the new vq, and tell virtio we're not interested in
* the 'weak' smp barriers, since we're talking with a real device. * the 'weak' smp barriers, since we're talking with a real device.
*/ */
vq = vring_new_virtqueue(id, len, rvring->align, vdev, false, ctx, vq = vring_new_virtqueue(id, num, rvring->align, vdev, false, ctx,
addr, rproc_virtio_notify, callback, name); addr, rproc_virtio_notify, callback, name);
if (!vq) { if (!vq) {
dev_err(dev, "vring_new_virtqueue %s failed\n", name); dev_err(dev, "vring_new_virtqueue %s failed\n", name);
...@@ -125,6 +125,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev, ...@@ -125,6 +125,8 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
vq->num_max = num;
rvring->vq = vq; rvring->vq = vq;
vq->priv = rvring; vq->priv = rvring;
...@@ -156,6 +158,7 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -156,6 +158,7 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool * ctx, const bool * ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
......
...@@ -532,6 +532,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, ...@@ -532,6 +532,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
err = -ENOMEM; err = -ENOMEM;
goto out_err; goto out_err;
} }
vq->num_max = info->num;
/* it may have been reduced */ /* it may have been reduced */
info->num = virtqueue_get_vring_size(vq); info->num = virtqueue_get_vring_size(vq);
...@@ -634,6 +637,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, ...@@ -634,6 +637,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
......
...@@ -29,7 +29,6 @@ u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector) ...@@ -29,7 +29,6 @@ u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector)
{ {
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
cfg = hw->common_cfg;
vp_iowrite16(vector, &cfg->msix_config); vp_iowrite16(vector, &cfg->msix_config);
return vp_ioread16(&cfg->msix_config); return vp_ioread16(&cfg->msix_config);
...@@ -128,6 +127,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) ...@@ -128,6 +127,7 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
break; break;
case VIRTIO_PCI_CAP_DEVICE_CFG: case VIRTIO_PCI_CAP_DEVICE_CFG:
hw->dev_cfg = get_cap_addr(hw, &cap); hw->dev_cfg = get_cap_addr(hw, &cap);
hw->cap_dev_config_size = le32_to_cpu(cap.length);
IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg); IFCVF_DBG(pdev, "hw->dev_cfg = %p\n", hw->dev_cfg);
break; break;
} }
...@@ -233,15 +233,23 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features) ...@@ -233,15 +233,23 @@ int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
u32 ifcvf_get_config_size(struct ifcvf_hw *hw) u32 ifcvf_get_config_size(struct ifcvf_hw *hw)
{ {
struct ifcvf_adapter *adapter; struct ifcvf_adapter *adapter;
u32 net_config_size = sizeof(struct virtio_net_config);
u32 blk_config_size = sizeof(struct virtio_blk_config);
u32 cap_size = hw->cap_dev_config_size;
u32 config_size; u32 config_size;
adapter = vf_to_adapter(hw); adapter = vf_to_adapter(hw);
/* If the onboard device config space size is greater than
* the size of struct virtio_net/blk_config, only the spec
* implementing contents size is returned, this is very
* unlikely, defensive programming.
*/
switch (hw->dev_type) { switch (hw->dev_type) {
case VIRTIO_ID_NET: case VIRTIO_ID_NET:
config_size = sizeof(struct virtio_net_config); config_size = min(cap_size, net_config_size);
break; break;
case VIRTIO_ID_BLOCK: case VIRTIO_ID_BLOCK:
config_size = sizeof(struct virtio_blk_config); config_size = min(cap_size, blk_config_size);
break; break;
default: default:
config_size = 0; config_size = 0;
......
...@@ -87,6 +87,8 @@ struct ifcvf_hw { ...@@ -87,6 +87,8 @@ struct ifcvf_hw {
int config_irq; int config_irq;
int vqs_reused_irq; int vqs_reused_irq;
u16 nr_vring; u16 nr_vring;
/* VIRTIO_PCI_CAP_DEVICE_CFG size */
u32 cap_dev_config_size;
}; };
struct ifcvf_adapter { struct ifcvf_adapter {
......
...@@ -685,7 +685,7 @@ static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_devic ...@@ -685,7 +685,7 @@ static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_devic
} }
/* /*
* IFCVF currently does't have on-chip IOMMU, so not * IFCVF currently doesn't have on-chip IOMMU, so not
* implemented set_map()/dma_map()/dma_unmap() * implemented set_map()/dma_map()/dma_unmap()
*/ */
static const struct vdpa_config_ops ifc_vdpa_ops = { static const struct vdpa_config_ops ifc_vdpa_ops = {
...@@ -752,59 +752,36 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, ...@@ -752,59 +752,36 @@ static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
{ {
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct ifcvf_adapter *adapter; struct ifcvf_adapter *adapter;
struct vdpa_device *vdpa_dev;
struct pci_dev *pdev; struct pci_dev *pdev;
struct ifcvf_hw *vf; struct ifcvf_hw *vf;
struct device *dev; int ret;
int ret, i;
ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev); ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
if (ifcvf_mgmt_dev->adapter) if (!ifcvf_mgmt_dev->adapter)
return -EOPNOTSUPP; return -EOPNOTSUPP;
pdev = ifcvf_mgmt_dev->pdev; adapter = ifcvf_mgmt_dev->adapter;
dev = &pdev->dev;
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
dev, &ifc_vdpa_ops, 1, 1, name, false);
if (IS_ERR(adapter)) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return PTR_ERR(adapter);
}
ifcvf_mgmt_dev->adapter = adapter;
vf = &adapter->vf; vf = &adapter->vf;
vf->dev_type = get_dev_type(pdev); pdev = adapter->pdev;
vf->base = pcim_iomap_table(pdev); vdpa_dev = &adapter->vdpa;
adapter->pdev = pdev; if (name)
adapter->vdpa.dma_dev = &pdev->dev; ret = dev_set_name(&vdpa_dev->dev, "%s", name);
else
ret = ifcvf_init_hw(vf, pdev); ret = dev_set_name(&vdpa_dev->dev, "vdpa%u", vdpa_dev->index);
if (ret) {
IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
goto err;
}
for (i = 0; i < vf->nr_vring; i++)
vf->vring[i].irq = -EINVAL;
vf->hw_features = ifcvf_get_hw_features(vf);
vf->config_size = ifcvf_get_config_size(vf);
adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring); ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
if (ret) { if (ret) {
put_device(&adapter->vdpa.dev);
IFCVF_ERR(pdev, "Failed to register to vDPA bus"); IFCVF_ERR(pdev, "Failed to register to vDPA bus");
goto err; return ret;
} }
return 0; return 0;
err:
put_device(&adapter->vdpa.dev);
return ret;
} }
static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
{ {
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
...@@ -823,61 +800,94 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -823,61 +800,94 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{ {
struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
struct device *dev = &pdev->dev; struct device *dev = &pdev->dev;
struct ifcvf_adapter *adapter;
struct ifcvf_hw *vf;
u32 dev_type; u32 dev_type;
int ret; int ret, i;
ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
if (!ifcvf_mgmt_dev) {
IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
return -ENOMEM;
}
dev_type = get_dev_type(pdev);
switch (dev_type) {
case VIRTIO_ID_NET:
ifcvf_mgmt_dev->mdev.id_table = id_table_net;
break;
case VIRTIO_ID_BLOCK:
ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
break;
default:
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
ret = -EOPNOTSUPP;
goto err;
}
ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
ifcvf_mgmt_dev->mdev.device = dev;
ifcvf_mgmt_dev->pdev = pdev;
ret = pcim_enable_device(pdev); ret = pcim_enable_device(pdev);
if (ret) { if (ret) {
IFCVF_ERR(pdev, "Failed to enable device\n"); IFCVF_ERR(pdev, "Failed to enable device\n");
goto err; return ret;
} }
ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4), ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
IFCVF_DRIVER_NAME); IFCVF_DRIVER_NAME);
if (ret) { if (ret) {
IFCVF_ERR(pdev, "Failed to request MMIO region\n"); IFCVF_ERR(pdev, "Failed to request MMIO region\n");
goto err; return ret;
} }
ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret) { if (ret) {
IFCVF_ERR(pdev, "No usable DMA configuration\n"); IFCVF_ERR(pdev, "No usable DMA configuration\n");
goto err; return ret;
} }
ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev); ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
if (ret) { if (ret) {
IFCVF_ERR(pdev, IFCVF_ERR(pdev,
"Failed for adding devres for freeing irq vectors\n"); "Failed for adding devres for freeing irq vectors\n");
goto err; return ret;
} }
pci_set_master(pdev); pci_set_master(pdev);
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
dev, &ifc_vdpa_ops, 1, 1, NULL, false);
if (IS_ERR(adapter)) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return PTR_ERR(adapter);
}
vf = &adapter->vf;
vf->dev_type = get_dev_type(pdev);
vf->base = pcim_iomap_table(pdev);
adapter->pdev = pdev;
adapter->vdpa.dma_dev = &pdev->dev;
ret = ifcvf_init_hw(vf, pdev);
if (ret) {
IFCVF_ERR(pdev, "Failed to init IFCVF hw\n");
return ret;
}
for (i = 0; i < vf->nr_vring; i++)
vf->vring[i].irq = -EINVAL;
vf->hw_features = ifcvf_get_hw_features(vf);
vf->config_size = ifcvf_get_config_size(vf);
ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
if (!ifcvf_mgmt_dev) {
IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
return -ENOMEM;
}
ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
ifcvf_mgmt_dev->mdev.device = dev;
ifcvf_mgmt_dev->adapter = adapter;
dev_type = get_dev_type(pdev);
switch (dev_type) {
case VIRTIO_ID_NET:
ifcvf_mgmt_dev->mdev.id_table = id_table_net;
break;
case VIRTIO_ID_BLOCK:
ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
break;
default:
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
ret = -EOPNOTSUPP;
goto err;
}
ifcvf_mgmt_dev->mdev.max_supported_vqs = vf->nr_vring;
ifcvf_mgmt_dev->mdev.supported_features = vf->hw_features;
adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev); ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev);
if (ret) { if (ret) {
IFCVF_ERR(pdev, IFCVF_ERR(pdev,
......
...@@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent { ...@@ -70,6 +70,16 @@ struct mlx5_vdpa_wq_ent {
struct mlx5_vdpa_dev *mvdev; struct mlx5_vdpa_dev *mvdev;
}; };
enum {
MLX5_VDPA_DATAVQ_GROUP,
MLX5_VDPA_CVQ_GROUP,
MLX5_VDPA_NUMVQ_GROUPS
};
enum {
MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS
};
struct mlx5_vdpa_dev { struct mlx5_vdpa_dev {
struct vdpa_device vdev; struct vdpa_device vdev;
struct mlx5_core_dev *mdev; struct mlx5_core_dev *mdev;
...@@ -85,6 +95,7 @@ struct mlx5_vdpa_dev { ...@@ -85,6 +95,7 @@ struct mlx5_vdpa_dev {
struct mlx5_vdpa_mr mr; struct mlx5_vdpa_mr mr;
struct mlx5_control_vq cvq; struct mlx5_control_vq cvq;
struct workqueue_struct *wq; struct workqueue_struct *wq;
unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
}; };
int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
......
...@@ -164,6 +164,7 @@ struct mlx5_vdpa_net { ...@@ -164,6 +164,7 @@ struct mlx5_vdpa_net {
bool setup; bool setup;
u32 cur_num_vqs; u32 cur_num_vqs;
u32 rqt_size; u32 rqt_size;
bool nb_registered;
struct notifier_block nb; struct notifier_block nb;
struct vdpa_callback config_cb; struct vdpa_callback config_cb;
struct mlx5_vdpa_wq_ent cvq_ent; struct mlx5_vdpa_wq_ent cvq_ent;
...@@ -895,6 +896,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque ...@@ -895,6 +896,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
if (err) if (err)
goto err_cmd; goto err_cmd;
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
kfree(in); kfree(in);
mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
...@@ -922,6 +924,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq ...@@ -922,6 +924,7 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
return; return;
} }
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
umems_destroy(ndev, mvq); umems_destroy(ndev, mvq);
} }
...@@ -1121,6 +1124,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu ...@@ -1121,6 +1124,20 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu
return err; return err;
} }
static bool is_valid_state_change(int oldstate, int newstate)
{
switch (oldstate) {
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
default:
return false;
}
}
static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
{ {
int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
...@@ -1130,6 +1147,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque ...@@ -1130,6 +1147,12 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
void *in; void *in;
int err; int err;
if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
return 0;
if (!is_valid_state_change(mvq->fw_state, state))
return -EINVAL;
in = kzalloc(inlen, GFP_KERNEL); in = kzalloc(inlen, GFP_KERNEL);
if (!in) if (!in)
return -ENOMEM; return -ENOMEM;
...@@ -1440,7 +1463,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, ...@@ -1440,7 +1463,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
memset(dmac_c, 0xff, ETH_ALEN); eth_broadcast_addr(dmac_c);
ether_addr_copy(dmac_v, mac); ether_addr_copy(dmac_v, mac);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
if (tagged) { if (tagged) {
...@@ -1992,6 +2015,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready ...@@ -1992,6 +2015,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq; struct mlx5_vdpa_virtqueue *mvq;
int err;
if (!mvdev->actual_features) if (!mvdev->actual_features)
return; return;
...@@ -2005,8 +2029,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready ...@@ -2005,8 +2029,16 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
} }
mvq = &ndev->vqs[idx]; mvq = &ndev->vqs[idx];
if (!ready) if (!ready) {
suspend_vq(ndev, mvq); suspend_vq(ndev, mvq);
} else {
err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
if (err) {
mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
ready = false;
}
}
mvq->ready = ready; mvq->ready = ready;
} }
...@@ -2095,9 +2127,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) ...@@ -2095,9 +2127,14 @@ static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
return PAGE_SIZE; return PAGE_SIZE;
} }
static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdpa, u16 idx) static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
{ {
return 0; struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
if (is_ctrl_vq_idx(mvdev, idx))
return MLX5_VDPA_CVQ_GROUP;
return MLX5_VDPA_DATAVQ_GROUP;
} }
enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
...@@ -2511,6 +2548,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) ...@@ -2511,6 +2548,15 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
up_write(&ndev->reslock); up_write(&ndev->reslock);
} }
static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
{
int i;
/* default mapping all groups are mapped to asid 0 */
for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
mvdev->group2asid[i] = 0;
}
static int mlx5_vdpa_reset(struct vdpa_device *vdev) static int mlx5_vdpa_reset(struct vdpa_device *vdev)
{ {
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
...@@ -2529,7 +2575,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) ...@@ -2529,7 +2575,9 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
ndev->mvdev.cvq.completed_desc = 0; ndev->mvdev.cvq.completed_desc = 0;
memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
ndev->mvdev.actual_features = 0; ndev->mvdev.actual_features = 0;
init_group_to_asid_map(mvdev);
++mvdev->generation; ++mvdev->generation;
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
if (mlx5_vdpa_create_mr(mvdev, NULL)) if (mlx5_vdpa_create_mr(mvdev, NULL))
mlx5_vdpa_warn(mvdev, "create MR failed\n"); mlx5_vdpa_warn(mvdev, "create MR failed\n");
...@@ -2567,26 +2615,63 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) ...@@ -2567,26 +2615,63 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
return mvdev->generation; return mvdev->generation;
} }
static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
struct vhost_iotlb *iotlb) {
u64 start = 0ULL, last = 0ULL - 1;
struct vhost_iotlb_map *map;
int err = 0;
spin_lock(&mvdev->cvq.iommu_lock);
vhost_iotlb_reset(mvdev->cvq.iotlb);
for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) {
err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start,
map->last, map->addr, map->perm);
if (err)
goto out;
}
out:
spin_unlock(&mvdev->cvq.iommu_lock);
return err;
}
static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{ {
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
bool change_map; bool change_map;
int err; int err;
down_write(&ndev->reslock);
err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
if (err) { if (err) {
mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
goto err; return err;
} }
if (change_map) if (change_map)
err = mlx5_vdpa_change_map(mvdev, iotlb); err = mlx5_vdpa_change_map(mvdev, iotlb);
err: return err;
}
static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
struct vhost_iotlb *iotlb)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
int err = -EINVAL;
down_write(&ndev->reslock);
if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
err = set_map_data(mvdev, iotlb);
if (err)
goto out;
}
if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
err = set_map_control(mvdev, iotlb);
out:
up_write(&ndev->reslock); up_write(&ndev->reslock);
return err; return err;
} }
...@@ -2733,6 +2818,49 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, ...@@ -2733,6 +2818,49 @@ static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
return err; return err;
} }
static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
{
struct mlx5_control_vq *cvq;
if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
return;
cvq = &mvdev->cvq;
cvq->ready = false;
}
static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq;
int i;
down_write(&ndev->reslock);
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
ndev->nb_registered = false;
flush_workqueue(ndev->mvdev.wq);
for (i = 0; i < ndev->cur_num_vqs; i++) {
mvq = &ndev->vqs[i];
suspend_vq(ndev, mvq);
}
mlx5_vdpa_cvq_suspend(mvdev);
up_write(&ndev->reslock);
return 0;
}
static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
unsigned int asid)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
if (group >= MLX5_VDPA_NUMVQ_GROUPS)
return -EINVAL;
mvdev->group2asid[group] = asid;
return 0;
}
static const struct vdpa_config_ops mlx5_vdpa_ops = { static const struct vdpa_config_ops mlx5_vdpa_ops = {
.set_vq_address = mlx5_vdpa_set_vq_address, .set_vq_address = mlx5_vdpa_set_vq_address,
.set_vq_num = mlx5_vdpa_set_vq_num, .set_vq_num = mlx5_vdpa_set_vq_num,
...@@ -2762,7 +2890,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { ...@@ -2762,7 +2890,9 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.set_config = mlx5_vdpa_set_config, .set_config = mlx5_vdpa_set_config,
.get_generation = mlx5_vdpa_get_generation, .get_generation = mlx5_vdpa_get_generation,
.set_map = mlx5_vdpa_set_map, .set_map = mlx5_vdpa_set_map,
.set_group_asid = mlx5_set_group_asid,
.free = mlx5_vdpa_free, .free = mlx5_vdpa_free,
.suspend = mlx5_vdpa_suspend,
}; };
static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
...@@ -2828,6 +2958,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev) ...@@ -2828,6 +2958,7 @@ static void init_mvqs(struct mlx5_vdpa_net *ndev)
mvq->index = i; mvq->index = i;
mvq->ndev = ndev; mvq->ndev = ndev;
mvq->fwqp.fw = true; mvq->fwqp.fw = true;
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
} }
for (; i < ndev->mvdev.max_vqs; i++) { for (; i < ndev->mvdev.max_vqs; i++) {
mvq = &ndev->vqs[i]; mvq = &ndev->vqs[i];
...@@ -2902,13 +3033,21 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p ...@@ -2902,13 +3033,21 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p
switch (eqe->sub_type) { switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN: case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
down_read(&ndev->reslock);
if (!ndev->nb_registered) {
up_read(&ndev->reslock);
return NOTIFY_DONE;
}
wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
if (!wqent) if (!wqent) {
up_read(&ndev->reslock);
return NOTIFY_DONE; return NOTIFY_DONE;
}
wqent->mvdev = &ndev->mvdev; wqent->mvdev = &ndev->mvdev;
INIT_WORK(&wqent->work, update_carrier); INIT_WORK(&wqent->work, update_carrier);
queue_work(ndev->mvdev.wq, &wqent->work); queue_work(ndev->mvdev.wq, &wqent->work);
up_read(&ndev->reslock);
ret = NOTIFY_OK; ret = NOTIFY_OK;
break; break;
default: default:
...@@ -2982,7 +3121,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ...@@ -2982,7 +3121,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
} }
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
1, 1, name, false); MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
if (IS_ERR(ndev)) if (IS_ERR(ndev))
return PTR_ERR(ndev); return PTR_ERR(ndev);
...@@ -3062,6 +3201,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ...@@ -3062,6 +3201,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
ndev->nb.notifier_call = event_handler; ndev->nb.notifier_call = event_handler;
mlx5_notifier_register(mdev, &ndev->nb); mlx5_notifier_register(mdev, &ndev->nb);
ndev->nb_registered = true;
mvdev->vdev.mdev = &mgtdev->mgtdev; mvdev->vdev.mdev = &mgtdev->mgtdev;
err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
if (err) if (err)
...@@ -3093,7 +3233,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * ...@@ -3093,7 +3233,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct workqueue_struct *wq; struct workqueue_struct *wq;
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); if (ndev->nb_registered) {
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
ndev->nb_registered = false;
}
wq = mvdev->wq; wq = mvdev->wq;
mvdev->wq = NULL; mvdev->wq = NULL;
destroy_workqueue(wq); destroy_workqueue(wq);
......
...@@ -824,11 +824,11 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms ...@@ -824,11 +824,11 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
config.mac)) config.mac))
return -EMSGSIZE; return -EMSGSIZE;
val_u16 = le16_to_cpu(config.status); val_u16 = __virtio16_to_cpu(true, config.status);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16)) if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
return -EMSGSIZE; return -EMSGSIZE;
val_u16 = le16_to_cpu(config.mtu); val_u16 = __virtio16_to_cpu(true, config.mtu);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
return -EMSGSIZE; return -EMSGSIZE;
...@@ -846,17 +846,9 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, ...@@ -846,17 +846,9 @@ vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid,
{ {
u32 device_id; u32 device_id;
void *hdr; void *hdr;
u8 status;
int err; int err;
down_read(&vdev->cf_lock); down_read(&vdev->cf_lock);
status = vdev->config->get_status(vdev);
if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
NL_SET_ERR_MSG_MOD(extack, "Features negotiation not completed");
err = -EAGAIN;
goto out;
}
hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
VDPA_CMD_DEV_CONFIG_GET); VDPA_CMD_DEV_CONFIG_GET);
if (!hdr) { if (!hdr) {
...@@ -913,7 +905,7 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg, ...@@ -913,7 +905,7 @@ static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg,
} }
vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config));
max_vqp = le16_to_cpu(config.max_virtqueue_pairs); max_vqp = __virtio16_to_cpu(true, config.max_virtqueue_pairs);
if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp)) if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, max_vqp))
return -EMSGSIZE; return -EMSGSIZE;
......
...@@ -33,7 +33,7 @@ MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); ...@@ -33,7 +33,7 @@ MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
static int max_iotlb_entries = 2048; static int max_iotlb_entries = 2048;
module_param(max_iotlb_entries, int, 0444); module_param(max_iotlb_entries, int, 0444);
MODULE_PARM_DESC(max_iotlb_entries, MODULE_PARM_DESC(max_iotlb_entries,
"Maximum number of iotlb entries. 0 means unlimited. (default: 2048)"); "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
#define VDPASIM_QUEUE_ALIGN PAGE_SIZE #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
#define VDPASIM_QUEUE_MAX 256 #define VDPASIM_QUEUE_MAX 256
...@@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) ...@@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
for (i = 0; i < vdpasim->dev_attr.nas; i++) for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_reset(&vdpasim->iommu[i]); vhost_iotlb_reset(&vdpasim->iommu[i]);
vdpasim->running = true;
spin_unlock(&vdpasim->iommu_lock); spin_unlock(&vdpasim->iommu_lock);
vdpasim->features = 0; vdpasim->features = 0;
...@@ -291,7 +292,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) ...@@ -291,7 +292,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
goto err_iommu; goto err_iommu;
for (i = 0; i < vdpasim->dev_attr.nas; i++) for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_init(&vdpasim->iommu[i], 0, 0); vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0);
vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL); vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
if (!vdpasim->buffer) if (!vdpasim->buffer)
...@@ -505,6 +506,17 @@ static int vdpasim_reset(struct vdpa_device *vdpa) ...@@ -505,6 +506,17 @@ static int vdpasim_reset(struct vdpa_device *vdpa)
return 0; return 0;
} }
static int vdpasim_suspend(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
spin_lock(&vdpasim->lock);
vdpasim->running = false;
spin_unlock(&vdpasim->lock);
return 0;
}
static size_t vdpasim_get_config_size(struct vdpa_device *vdpa) static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
{ {
struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
...@@ -694,6 +706,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { ...@@ -694,6 +706,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_status = vdpasim_get_status, .get_status = vdpasim_get_status,
.set_status = vdpasim_set_status, .set_status = vdpasim_set_status,
.reset = vdpasim_reset, .reset = vdpasim_reset,
.suspend = vdpasim_suspend,
.get_config_size = vdpasim_get_config_size, .get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config, .get_config = vdpasim_get_config,
.set_config = vdpasim_set_config, .set_config = vdpasim_set_config,
...@@ -726,6 +739,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { ...@@ -726,6 +739,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_status = vdpasim_get_status, .get_status = vdpasim_get_status,
.set_status = vdpasim_set_status, .set_status = vdpasim_set_status,
.reset = vdpasim_reset, .reset = vdpasim_reset,
.suspend = vdpasim_suspend,
.get_config_size = vdpasim_get_config_size, .get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config, .get_config = vdpasim_get_config,
.set_config = vdpasim_set_config, .set_config = vdpasim_set_config,
......
...@@ -66,6 +66,7 @@ struct vdpasim { ...@@ -66,6 +66,7 @@ struct vdpasim {
u32 generation; u32 generation;
u64 features; u64 features;
u32 groups; u32 groups;
bool running;
/* spinlock to synchronize iommu table */ /* spinlock to synchronize iommu table */
spinlock_t iommu_lock; spinlock_t iommu_lock;
}; };
......
...@@ -25,31 +25,49 @@ ...@@ -25,31 +25,49 @@
#define DRV_LICENSE "GPL v2" #define DRV_LICENSE "GPL v2"
#define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \ #define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \
(1ULL << VIRTIO_BLK_F_FLUSH) | \
(1ULL << VIRTIO_BLK_F_SIZE_MAX) | \ (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
(1ULL << VIRTIO_BLK_F_SEG_MAX) | \ (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
(1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
(1ULL << VIRTIO_BLK_F_TOPOLOGY) | \ (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
(1ULL << VIRTIO_BLK_F_MQ)) (1ULL << VIRTIO_BLK_F_MQ) | \
(1ULL << VIRTIO_BLK_F_DISCARD) | \
(1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
#define VDPASIM_BLK_CAPACITY 0x40000 #define VDPASIM_BLK_CAPACITY 0x40000
#define VDPASIM_BLK_SIZE_MAX 0x1000 #define VDPASIM_BLK_SIZE_MAX 0x1000
#define VDPASIM_BLK_SEG_MAX 32 #define VDPASIM_BLK_SEG_MAX 32
#define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
/* 1 virtqueue, 1 address space, 1 virtqueue group */
#define VDPASIM_BLK_VQ_NUM 1 #define VDPASIM_BLK_VQ_NUM 1
#define VDPASIM_BLK_AS_NUM 1
#define VDPASIM_BLK_GROUP_NUM 1
static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim"; static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
static bool vdpasim_blk_check_range(u64 start_sector, size_t range_size) static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
u64 num_sectors, u64 max_sectors)
{ {
u64 range_sectors = range_size >> SECTOR_SHIFT; if (start_sector > VDPASIM_BLK_CAPACITY) {
dev_dbg(&vdpasim->vdpa.dev,
if (range_size > VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX) "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
return false; start_sector, VDPASIM_BLK_CAPACITY);
}
if (start_sector > VDPASIM_BLK_CAPACITY) if (num_sectors > max_sectors) {
dev_dbg(&vdpasim->vdpa.dev,
"number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
num_sectors, max_sectors);
return false; return false;
}
if (range_sectors > VDPASIM_BLK_CAPACITY - start_sector) if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
dev_dbg(&vdpasim->vdpa.dev,
"request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
return false; return false;
}
return true; return true;
} }
...@@ -63,6 +81,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -63,6 +81,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
{ {
size_t pushed = 0, to_pull, to_push; size_t pushed = 0, to_pull, to_push;
struct virtio_blk_outhdr hdr; struct virtio_blk_outhdr hdr;
bool handled = false;
ssize_t bytes; ssize_t bytes;
loff_t offset; loff_t offset;
u64 sector; u64 sector;
...@@ -76,14 +95,14 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -76,14 +95,14 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
return false; return false;
if (vq->out_iov.used < 1 || vq->in_iov.used < 1) { if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
dev_err(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n", dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
vq->out_iov.used, vq->in_iov.used); vq->out_iov.used, vq->in_iov.used);
return false; goto err;
} }
if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) { if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
dev_err(&vdpasim->vdpa.dev, "request in header too short\n"); dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
return false; goto err;
} }
/* The last byte is the status and we checked if the last iov has /* The last byte is the status and we checked if the last iov has
...@@ -96,8 +115,8 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -96,8 +115,8 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr, bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
sizeof(hdr)); sizeof(hdr));
if (bytes != sizeof(hdr)) { if (bytes != sizeof(hdr)) {
dev_err(&vdpasim->vdpa.dev, "request out header too short\n"); dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
return false; goto err;
} }
to_pull -= bytes; to_pull -= bytes;
...@@ -107,12 +126,20 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -107,12 +126,20 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
offset = sector << SECTOR_SHIFT; offset = sector << SECTOR_SHIFT;
status = VIRTIO_BLK_S_OK; status = VIRTIO_BLK_S_OK;
if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
sector != 0) {
dev_dbg(&vdpasim->vdpa.dev,
"sector must be 0 for %u request - sector: 0x%llx\n",
type, sector);
status = VIRTIO_BLK_S_IOERR;
goto err_status;
}
switch (type) { switch (type) {
case VIRTIO_BLK_T_IN: case VIRTIO_BLK_T_IN:
if (!vdpasim_blk_check_range(sector, to_push)) { if (!vdpasim_blk_check_range(vdpasim, sector,
dev_err(&vdpasim->vdpa.dev, to_push >> SECTOR_SHIFT,
"reading over the capacity - offset: 0x%llx len: 0x%zx\n", VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
offset, to_push);
status = VIRTIO_BLK_S_IOERR; status = VIRTIO_BLK_S_IOERR;
break; break;
} }
...@@ -121,7 +148,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -121,7 +148,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
vdpasim->buffer + offset, vdpasim->buffer + offset,
to_push); to_push);
if (bytes < 0) { if (bytes < 0) {
dev_err(&vdpasim->vdpa.dev, dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
bytes, offset, to_push); bytes, offset, to_push);
status = VIRTIO_BLK_S_IOERR; status = VIRTIO_BLK_S_IOERR;
...@@ -132,10 +159,9 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -132,10 +159,9 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
break; break;
case VIRTIO_BLK_T_OUT: case VIRTIO_BLK_T_OUT:
if (!vdpasim_blk_check_range(sector, to_pull)) { if (!vdpasim_blk_check_range(vdpasim, sector,
dev_err(&vdpasim->vdpa.dev, to_pull >> SECTOR_SHIFT,
"writing over the capacity - offset: 0x%llx len: 0x%zx\n", VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
offset, to_pull);
status = VIRTIO_BLK_S_IOERR; status = VIRTIO_BLK_S_IOERR;
break; break;
} }
...@@ -144,7 +170,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -144,7 +170,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
vdpasim->buffer + offset, vdpasim->buffer + offset,
to_pull); to_pull);
if (bytes < 0) { if (bytes < 0) {
dev_err(&vdpasim->vdpa.dev, dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
bytes, offset, to_pull); bytes, offset, to_pull);
status = VIRTIO_BLK_S_IOERR; status = VIRTIO_BLK_S_IOERR;
...@@ -157,7 +183,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -157,7 +183,7 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
vdpasim_blk_id, vdpasim_blk_id,
VIRTIO_BLK_ID_BYTES); VIRTIO_BLK_ID_BYTES);
if (bytes < 0) { if (bytes < 0) {
dev_err(&vdpasim->vdpa.dev, dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_push_iotlb() error: %zd\n", bytes); "vringh_iov_push_iotlb() error: %zd\n", bytes);
status = VIRTIO_BLK_S_IOERR; status = VIRTIO_BLK_S_IOERR;
break; break;
...@@ -166,13 +192,76 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -166,13 +192,76 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
pushed += bytes; pushed += bytes;
break; break;
case VIRTIO_BLK_T_FLUSH:
/* nothing to do */
break;
case VIRTIO_BLK_T_DISCARD:
case VIRTIO_BLK_T_WRITE_ZEROES: {
struct virtio_blk_discard_write_zeroes range;
u32 num_sectors, flags;
if (to_pull != sizeof(range)) {
dev_dbg(&vdpasim->vdpa.dev,
"discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
to_pull, sizeof(range));
status = VIRTIO_BLK_S_IOERR;
break;
}
bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
to_pull);
if (bytes < 0) {
dev_dbg(&vdpasim->vdpa.dev,
"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
bytes, offset, to_pull);
status = VIRTIO_BLK_S_IOERR;
break;
}
sector = le64_to_cpu(range.sector);
offset = sector << SECTOR_SHIFT;
num_sectors = le32_to_cpu(range.num_sectors);
flags = le32_to_cpu(range.flags);
if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
dev_dbg(&vdpasim->vdpa.dev,
"discard unexpected flags set - flags: 0x%x\n",
flags);
status = VIRTIO_BLK_S_UNSUPP;
break;
}
if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
dev_dbg(&vdpasim->vdpa.dev,
"write_zeroes unexpected flags set - flags: 0x%x\n",
flags);
status = VIRTIO_BLK_S_UNSUPP;
break;
}
if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
VDPASIM_BLK_DWZ_MAX_SECTORS)) {
status = VIRTIO_BLK_S_IOERR;
break;
}
if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
memset(vdpasim->buffer + offset, 0,
num_sectors << SECTOR_SHIFT);
}
break;
}
default: default:
dev_warn(&vdpasim->vdpa.dev, dev_dbg(&vdpasim->vdpa.dev,
"Unsupported request type %d\n", type); "Unsupported request type %d\n", type);
status = VIRTIO_BLK_S_IOERR; status = VIRTIO_BLK_S_IOERR;
break; break;
} }
err_status:
/* If some operations fail, we need to skip the remaining bytes /* If some operations fail, we need to skip the remaining bytes
* to put the status in the last byte * to put the status in the last byte
*/ */
...@@ -182,21 +271,25 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, ...@@ -182,21 +271,25 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
/* Last byte is the status */ /* Last byte is the status */
bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1); bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
if (bytes != 1) if (bytes != 1)
return false; goto err;
pushed += bytes; pushed += bytes;
/* Make sure data is wrote before advancing index */ /* Make sure data is wrote before advancing index */
smp_wmb(); smp_wmb();
handled = true;
err:
vringh_complete_iotlb(&vq->vring, vq->head, pushed); vringh_complete_iotlb(&vq->vring, vq->head, pushed);
return true; return handled;
} }
static void vdpasim_blk_work(struct work_struct *work) static void vdpasim_blk_work(struct work_struct *work)
{ {
struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
bool reschedule = false;
int i; int i;
spin_lock(&vdpasim->lock); spin_lock(&vdpasim->lock);
...@@ -204,8 +297,12 @@ static void vdpasim_blk_work(struct work_struct *work) ...@@ -204,8 +297,12 @@ static void vdpasim_blk_work(struct work_struct *work)
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out; goto out;
if (!vdpasim->running)
goto out;
for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) { for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
struct vdpasim_virtqueue *vq = &vdpasim->vqs[i]; struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
int reqs = 0;
if (!vq->ready) if (!vq->ready)
continue; continue;
...@@ -218,10 +315,18 @@ static void vdpasim_blk_work(struct work_struct *work) ...@@ -218,10 +315,18 @@ static void vdpasim_blk_work(struct work_struct *work)
if (vringh_need_notify_iotlb(&vq->vring) > 0) if (vringh_need_notify_iotlb(&vq->vring) > 0)
vringh_notify(&vq->vring); vringh_notify(&vq->vring);
local_bh_enable(); local_bh_enable();
if (++reqs > 4) {
reschedule = true;
break;
}
} }
} }
out: out:
spin_unlock(&vdpasim->lock); spin_unlock(&vdpasim->lock);
if (reschedule)
schedule_work(&vdpasim->work);
} }
static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config) static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
...@@ -237,6 +342,17 @@ static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config) ...@@ -237,6 +342,17 @@ static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1); blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1); blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
/* VIRTIO_BLK_F_DISCARD */
blk_config->discard_sector_alignment =
cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
blk_config->max_discard_sectors =
cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
/* VIRTIO_BLK_F_WRITE_ZEROES */
blk_config->max_write_zeroes_sectors =
cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
} }
static void vdpasim_blk_mgmtdev_release(struct device *dev) static void vdpasim_blk_mgmtdev_release(struct device *dev)
...@@ -260,6 +376,8 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, ...@@ -260,6 +376,8 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
dev_attr.id = VIRTIO_ID_BLOCK; dev_attr.id = VIRTIO_ID_BLOCK;
dev_attr.supported_features = VDPASIM_BLK_FEATURES; dev_attr.supported_features = VDPASIM_BLK_FEATURES;
dev_attr.nvqs = VDPASIM_BLK_VQ_NUM; dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
dev_attr.nas = VDPASIM_BLK_AS_NUM;
dev_attr.config_size = sizeof(struct virtio_blk_config); dev_attr.config_size = sizeof(struct virtio_blk_config);
dev_attr.get_config = vdpasim_blk_get_config; dev_attr.get_config = vdpasim_blk_get_config;
dev_attr.work_fn = vdpasim_blk_work; dev_attr.work_fn = vdpasim_blk_work;
......
...@@ -154,6 +154,9 @@ static void vdpasim_net_work(struct work_struct *work) ...@@ -154,6 +154,9 @@ static void vdpasim_net_work(struct work_struct *work)
spin_lock(&vdpasim->lock); spin_lock(&vdpasim->lock);
if (!vdpasim->running)
goto out;
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out; goto out;
......
...@@ -138,18 +138,17 @@ static void do_bounce(phys_addr_t orig, void *addr, size_t size, ...@@ -138,18 +138,17 @@ static void do_bounce(phys_addr_t orig, void *addr, size_t size,
{ {
unsigned long pfn = PFN_DOWN(orig); unsigned long pfn = PFN_DOWN(orig);
unsigned int offset = offset_in_page(orig); unsigned int offset = offset_in_page(orig);
char *buffer; struct page *page;
unsigned int sz = 0; unsigned int sz = 0;
while (size) { while (size) {
sz = min_t(size_t, PAGE_SIZE - offset, size); sz = min_t(size_t, PAGE_SIZE - offset, size);
buffer = kmap_atomic(pfn_to_page(pfn)); page = pfn_to_page(pfn);
if (dir == DMA_TO_DEVICE) if (dir == DMA_TO_DEVICE)
memcpy(addr, buffer + offset, sz); memcpy_from_page(addr, page, offset, sz);
else else
memcpy(buffer + offset, addr, sz); memcpy_to_page(page, offset, addr, sz);
kunmap_atomic(buffer);
size -= sz; size -= sz;
pfn++; pfn++;
...@@ -179,8 +178,9 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain, ...@@ -179,8 +178,9 @@ static void vduse_domain_bounce(struct vduse_iova_domain *domain,
map->orig_phys == INVALID_PHYS_ADDR)) map->orig_phys == INVALID_PHYS_ADDR))
return; return;
addr = page_address(map->bounce_page) + offset; addr = kmap_local_page(map->bounce_page);
do_bounce(map->orig_phys + offset, addr, sz, dir); do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
kunmap_local(addr);
size -= sz; size -= sz;
iova += sz; iova += sz;
} }
...@@ -213,21 +213,21 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova) ...@@ -213,21 +213,21 @@ vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
struct vduse_bounce_map *map; struct vduse_bounce_map *map;
struct page *page = NULL; struct page *page = NULL;
spin_lock(&domain->iotlb_lock); read_lock(&domain->bounce_lock);
map = &domain->bounce_maps[iova >> PAGE_SHIFT]; map = &domain->bounce_maps[iova >> PAGE_SHIFT];
if (!map->bounce_page) if (domain->user_bounce_pages || !map->bounce_page)
goto out; goto out;
page = map->bounce_page; page = map->bounce_page;
get_page(page); get_page(page);
out: out:
spin_unlock(&domain->iotlb_lock); read_unlock(&domain->bounce_lock);
return page; return page;
} }
static void static void
vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain) vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
{ {
struct vduse_bounce_map *map; struct vduse_bounce_map *map;
unsigned long pfn, bounce_pfns; unsigned long pfn, bounce_pfns;
...@@ -247,6 +247,73 @@ vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain) ...@@ -247,6 +247,73 @@ vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
} }
} }
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
struct page **pages, int count)
{
struct vduse_bounce_map *map;
int i, ret;
/* Now we don't support partial mapping */
if (count != (domain->bounce_size >> PAGE_SHIFT))
return -EINVAL;
write_lock(&domain->bounce_lock);
ret = -EEXIST;
if (domain->user_bounce_pages)
goto out;
for (i = 0; i < count; i++) {
map = &domain->bounce_maps[i];
if (map->bounce_page) {
/* Copy kernel page to user page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR)
memcpy_to_page(pages[i], 0,
page_address(map->bounce_page),
PAGE_SIZE);
__free_page(map->bounce_page);
}
map->bounce_page = pages[i];
get_page(pages[i]);
}
domain->user_bounce_pages = true;
ret = 0;
out:
write_unlock(&domain->bounce_lock);
return ret;
}
void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
{
struct vduse_bounce_map *map;
unsigned long i, count;
write_lock(&domain->bounce_lock);
if (!domain->user_bounce_pages)
goto out;
count = domain->bounce_size >> PAGE_SHIFT;
for (i = 0; i < count; i++) {
struct page *page = NULL;
map = &domain->bounce_maps[i];
if (WARN_ON(!map->bounce_page))
continue;
/* Copy user page to kernel page if it's in use */
if (map->orig_phys != INVALID_PHYS_ADDR) {
page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
memcpy_from_page(page_address(page),
map->bounce_page, 0, PAGE_SIZE);
}
put_page(map->bounce_page);
map->bounce_page = page;
}
domain->user_bounce_pages = false;
out:
write_unlock(&domain->bounce_lock);
}
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain) void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
{ {
if (!domain->bounce_map) if (!domain->bounce_map)
...@@ -322,13 +389,18 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, ...@@ -322,13 +389,18 @@ dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
if (vduse_domain_init_bounce_map(domain)) if (vduse_domain_init_bounce_map(domain))
goto err; goto err;
read_lock(&domain->bounce_lock);
if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
goto err; goto err_unlock;
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
read_unlock(&domain->bounce_lock);
return iova; return iova;
err_unlock:
read_unlock(&domain->bounce_lock);
err: err:
vduse_domain_free_iova(iovad, iova, size); vduse_domain_free_iova(iovad, iova, size);
return DMA_MAPPING_ERROR; return DMA_MAPPING_ERROR;
...@@ -340,10 +412,12 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain, ...@@ -340,10 +412,12 @@ void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
{ {
struct iova_domain *iovad = &domain->stream_iovad; struct iova_domain *iovad = &domain->stream_iovad;
read_lock(&domain->bounce_lock);
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
read_unlock(&domain->bounce_lock);
vduse_domain_free_iova(iovad, dma_addr, size); vduse_domain_free_iova(iovad, dma_addr, size);
} }
...@@ -451,7 +525,8 @@ static int vduse_domain_release(struct inode *inode, struct file *file) ...@@ -451,7 +525,8 @@ static int vduse_domain_release(struct inode *inode, struct file *file)
spin_lock(&domain->iotlb_lock); spin_lock(&domain->iotlb_lock);
vduse_iotlb_del_range(domain, 0, ULLONG_MAX); vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
vduse_domain_free_bounce_pages(domain); vduse_domain_remove_user_bounce_pages(domain);
vduse_domain_free_kernel_bounce_pages(domain);
spin_unlock(&domain->iotlb_lock); spin_unlock(&domain->iotlb_lock);
put_iova_domain(&domain->stream_iovad); put_iova_domain(&domain->stream_iovad);
put_iova_domain(&domain->consistent_iovad); put_iova_domain(&domain->consistent_iovad);
...@@ -511,6 +586,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size) ...@@ -511,6 +586,7 @@ vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
goto err_file; goto err_file;
domain->file = file; domain->file = file;
rwlock_init(&domain->bounce_lock);
spin_lock_init(&domain->iotlb_lock); spin_lock_init(&domain->iotlb_lock);
init_iova_domain(&domain->stream_iovad, init_iova_domain(&domain->stream_iovad,
PAGE_SIZE, IOVA_START_PFN); PAGE_SIZE, IOVA_START_PFN);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/iova.h> #include <linux/iova.h>
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/vhost_iotlb.h> #include <linux/vhost_iotlb.h>
#include <linux/rwlock.h>
#define IOVA_START_PFN 1 #define IOVA_START_PFN 1
...@@ -34,6 +35,8 @@ struct vduse_iova_domain { ...@@ -34,6 +35,8 @@ struct vduse_iova_domain {
struct vhost_iotlb *iotlb; struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock; spinlock_t iotlb_lock;
struct file *file; struct file *file;
bool user_bounce_pages;
rwlock_t bounce_lock;
}; };
int vduse_domain_set_map(struct vduse_iova_domain *domain, int vduse_domain_set_map(struct vduse_iova_domain *domain,
...@@ -61,6 +64,11 @@ void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, ...@@ -61,6 +64,11 @@ void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain); void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain);
int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
struct page **pages, int count);
void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain);
void vduse_domain_destroy(struct vduse_iova_domain *domain); void vduse_domain_destroy(struct vduse_iova_domain *domain);
struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit, struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit,
......
...@@ -21,6 +21,8 @@ ...@@ -21,6 +21,8 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/vdpa.h> #include <linux/vdpa.h>
#include <linux/nospec.h> #include <linux/nospec.h>
#include <linux/vmalloc.h>
#include <linux/sched/mm.h>
#include <uapi/linux/vduse.h> #include <uapi/linux/vduse.h>
#include <uapi/linux/vdpa.h> #include <uapi/linux/vdpa.h>
#include <uapi/linux/virtio_config.h> #include <uapi/linux/virtio_config.h>
...@@ -64,6 +66,13 @@ struct vduse_vdpa { ...@@ -64,6 +66,13 @@ struct vduse_vdpa {
struct vduse_dev *dev; struct vduse_dev *dev;
}; };
struct vduse_umem {
unsigned long iova;
unsigned long npages;
struct page **pages;
struct mm_struct *mm;
};
struct vduse_dev { struct vduse_dev {
struct vduse_vdpa *vdev; struct vduse_vdpa *vdev;
struct device *dev; struct device *dev;
...@@ -95,6 +104,8 @@ struct vduse_dev { ...@@ -95,6 +104,8 @@ struct vduse_dev {
u8 status; u8 status;
u32 vq_num; u32 vq_num;
u32 vq_align; u32 vq_align;
struct vduse_umem *umem;
struct mutex mem_lock;
}; };
struct vduse_dev_msg { struct vduse_dev_msg {
...@@ -917,6 +928,102 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev, ...@@ -917,6 +928,102 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
return ret; return ret;
} }
static int vduse_dev_dereg_umem(struct vduse_dev *dev,
u64 iova, u64 size)
{
int ret;
mutex_lock(&dev->mem_lock);
ret = -ENOENT;
if (!dev->umem)
goto unlock;
ret = -EINVAL;
if (dev->umem->iova != iova || size != dev->domain->bounce_size)
goto unlock;
vduse_domain_remove_user_bounce_pages(dev->domain);
unpin_user_pages_dirty_lock(dev->umem->pages,
dev->umem->npages, true);
atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
mmdrop(dev->umem->mm);
vfree(dev->umem->pages);
kfree(dev->umem);
dev->umem = NULL;
ret = 0;
unlock:
mutex_unlock(&dev->mem_lock);
return ret;
}
static int vduse_dev_reg_umem(struct vduse_dev *dev,
u64 iova, u64 uaddr, u64 size)
{
struct page **page_list = NULL;
struct vduse_umem *umem = NULL;
long pinned = 0;
unsigned long npages, lock_limit;
int ret;
if (!dev->domain->bounce_map ||
size != dev->domain->bounce_size ||
iova != 0 || uaddr & ~PAGE_MASK)
return -EINVAL;
mutex_lock(&dev->mem_lock);
ret = -EEXIST;
if (dev->umem)
goto unlock;
ret = -ENOMEM;
npages = size >> PAGE_SHIFT;
page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
GFP_KERNEL_ACCOUNT);
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!page_list || !umem)
goto unlock;
mmap_read_lock(current->mm);
lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
goto out;
pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
page_list, NULL);
if (pinned != npages) {
ret = pinned < 0 ? pinned : -ENOMEM;
goto out;
}
ret = vduse_domain_add_user_bounce_pages(dev->domain,
page_list, pinned);
if (ret)
goto out;
atomic64_add(npages, &current->mm->pinned_vm);
umem->pages = page_list;
umem->npages = pinned;
umem->iova = iova;
umem->mm = current->mm;
mmgrab(current->mm);
dev->umem = umem;
out:
if (ret && pinned > 0)
unpin_user_pages(page_list, pinned);
mmap_read_unlock(current->mm);
unlock:
if (ret) {
vfree(page_list);
kfree(umem);
}
mutex_unlock(&dev->mem_lock);
return ret;
}
static long vduse_dev_ioctl(struct file *file, unsigned int cmd, static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
...@@ -1089,6 +1196,77 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, ...@@ -1089,6 +1196,77 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
break; break;
} }
case VDUSE_IOTLB_REG_UMEM: {
struct vduse_iova_umem umem;
ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;
ret = -EINVAL;
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
ret = vduse_dev_reg_umem(dev, umem.iova,
umem.uaddr, umem.size);
break;
}
case VDUSE_IOTLB_DEREG_UMEM: {
struct vduse_iova_umem umem;
ret = -EFAULT;
if (copy_from_user(&umem, argp, sizeof(umem)))
break;
ret = -EINVAL;
if (!is_mem_zero((const char *)umem.reserved,
sizeof(umem.reserved)))
break;
ret = vduse_dev_dereg_umem(dev, umem.iova,
umem.size);
break;
}
case VDUSE_IOTLB_GET_INFO: {
struct vduse_iova_info info;
struct vhost_iotlb_map *map;
struct vduse_iova_domain *domain = dev->domain;
ret = -EFAULT;
if (copy_from_user(&info, argp, sizeof(info)))
break;
ret = -EINVAL;
if (info.start > info.last)
break;
if (!is_mem_zero((const char *)info.reserved,
sizeof(info.reserved)))
break;
spin_lock(&domain->iotlb_lock);
map = vhost_iotlb_itree_first(domain->iotlb,
info.start, info.last);
if (map) {
info.start = map->start;
info.last = map->last;
info.capability = 0;
if (domain->bounce_map && map->start == 0 &&
map->last == domain->bounce_size - 1)
info.capability |= VDUSE_IOVA_CAP_UMEM;
}
spin_unlock(&domain->iotlb_lock);
if (!map)
break;
ret = -EFAULT;
if (copy_to_user(argp, &info, sizeof(info)))
break;
ret = 0;
break;
}
default: default:
ret = -ENOIOCTLCMD; ret = -ENOIOCTLCMD;
break; break;
...@@ -1101,6 +1279,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file) ...@@ -1101,6 +1279,7 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
{ {
struct vduse_dev *dev = file->private_data; struct vduse_dev *dev = file->private_data;
vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
spin_lock(&dev->msg_lock); spin_lock(&dev->msg_lock);
/* Make sure the inflight messages can processed after reconncection */ /* Make sure the inflight messages can processed after reconncection */
list_splice_init(&dev->recv_list, &dev->send_list); list_splice_init(&dev->recv_list, &dev->send_list);
...@@ -1163,6 +1342,7 @@ static struct vduse_dev *vduse_dev_create(void) ...@@ -1163,6 +1342,7 @@ static struct vduse_dev *vduse_dev_create(void)
return NULL; return NULL;
mutex_init(&dev->lock); mutex_init(&dev->lock);
mutex_init(&dev->mem_lock);
spin_lock_init(&dev->msg_lock); spin_lock_init(&dev->msg_lock);
INIT_LIST_HEAD(&dev->send_list); INIT_LIST_HEAD(&dev->send_list);
INIT_LIST_HEAD(&dev->recv_list); INIT_LIST_HEAD(&dev->recv_list);
......
...@@ -159,9 +159,13 @@ enum { ...@@ -159,9 +159,13 @@ enum {
}; };
#define VHOST_SCSI_MAX_TARGET 256 #define VHOST_SCSI_MAX_TARGET 256
#define VHOST_SCSI_MAX_VQ 128 #define VHOST_SCSI_MAX_IO_VQ 1024
#define VHOST_SCSI_MAX_EVENT 128 #define VHOST_SCSI_MAX_EVENT 128
static unsigned vhost_scsi_max_io_vqs = 128;
module_param_named(max_io_vqs, vhost_scsi_max_io_vqs, uint, 0644);
MODULE_PARM_DESC(max_io_vqs, "Set the max number of IO virtqueues a vhost scsi device can support. The default is 128. The max is 1024.");
struct vhost_scsi_virtqueue { struct vhost_scsi_virtqueue {
struct vhost_virtqueue vq; struct vhost_virtqueue vq;
/* /*
...@@ -186,7 +190,9 @@ struct vhost_scsi { ...@@ -186,7 +190,9 @@ struct vhost_scsi {
char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
struct vhost_dev dev; struct vhost_dev dev;
struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ]; struct vhost_scsi_virtqueue *vqs;
unsigned long *compl_bitmap;
struct vhost_scsi_inflight **old_inflight;
struct vhost_work vs_completion_work; /* cmd completion work item */ struct vhost_work vs_completion_work; /* cmd completion work item */
struct llist_head vs_completion_list; /* cmd completion queue */ struct llist_head vs_completion_list; /* cmd completion queue */
...@@ -245,7 +251,7 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs, ...@@ -245,7 +251,7 @@ static void vhost_scsi_init_inflight(struct vhost_scsi *vs,
struct vhost_virtqueue *vq; struct vhost_virtqueue *vq;
int idx, i; int idx, i;
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
...@@ -533,7 +539,6 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -533,7 +539,6 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
{ {
struct vhost_scsi *vs = container_of(work, struct vhost_scsi, struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
vs_completion_work); vs_completion_work);
DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ);
struct virtio_scsi_cmd_resp v_rsp; struct virtio_scsi_cmd_resp v_rsp;
struct vhost_scsi_cmd *cmd, *t; struct vhost_scsi_cmd *cmd, *t;
struct llist_node *llnode; struct llist_node *llnode;
...@@ -541,7 +546,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -541,7 +546,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
struct iov_iter iov_iter; struct iov_iter iov_iter;
int ret, vq; int ret, vq;
bitmap_zero(signal, VHOST_SCSI_MAX_VQ); bitmap_zero(vs->compl_bitmap, vs->dev.nvqs);
llnode = llist_del_all(&vs->vs_completion_list); llnode = llist_del_all(&vs->vs_completion_list);
llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) {
se_cmd = &cmd->tvc_se_cmd; se_cmd = &cmd->tvc_se_cmd;
...@@ -566,7 +571,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -566,7 +571,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0); vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
vq = q - vs->vqs; vq = q - vs->vqs;
__set_bit(vq, signal); __set_bit(vq, vs->compl_bitmap);
} else } else
pr_err("Faulted on virtio_scsi_cmd_resp\n"); pr_err("Faulted on virtio_scsi_cmd_resp\n");
...@@ -574,8 +579,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) ...@@ -574,8 +579,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
} }
vq = -1; vq = -1;
while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1)) while ((vq = find_next_bit(vs->compl_bitmap, vs->dev.nvqs, vq + 1))
< VHOST_SCSI_MAX_VQ) < vs->dev.nvqs)
vhost_signal(&vs->dev, &vs->vqs[vq].vq); vhost_signal(&vs->dev, &vs->vqs[vq].vq);
} }
...@@ -1419,26 +1424,25 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) ...@@ -1419,26 +1424,25 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
/* Callers must hold dev mutex */ /* Callers must hold dev mutex */
static void vhost_scsi_flush(struct vhost_scsi *vs) static void vhost_scsi_flush(struct vhost_scsi *vs)
{ {
struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
int i; int i;
/* Init new inflight and remember the old inflight */ /* Init new inflight and remember the old inflight */
vhost_scsi_init_inflight(vs, old_inflight); vhost_scsi_init_inflight(vs, vs->old_inflight);
/* /*
* The inflight->kref was initialized to 1. We decrement it here to * The inflight->kref was initialized to 1. We decrement it here to
* indicate the start of the flush operation so that it will reach 0 * indicate the start of the flush operation so that it will reach 0
* when all the reqs are finished. * when all the reqs are finished.
*/ */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) for (i = 0; i < vs->dev.nvqs; i++)
kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight); kref_put(&vs->old_inflight[i]->kref, vhost_scsi_done_inflight);
/* Flush both the vhost poll and vhost work */ /* Flush both the vhost poll and vhost work */
vhost_dev_flush(&vs->dev); vhost_dev_flush(&vs->dev);
/* Wait for all reqs issued before the flush to be finished */ /* Wait for all reqs issued before the flush to be finished */
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) for (i = 0; i < vs->dev.nvqs; i++)
wait_for_completion(&old_inflight[i]->comp); wait_for_completion(&vs->old_inflight[i]->comp);
} }
static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq) static void vhost_scsi_destroy_vq_cmds(struct vhost_virtqueue *vq)
...@@ -1601,7 +1605,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ...@@ -1601,7 +1605,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
sizeof(vs->vs_vhost_wwpn)); sizeof(vs->vs_vhost_wwpn));
for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { for (i = VHOST_SCSI_VQ_IO; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
if (!vhost_vq_is_setup(vq)) if (!vhost_vq_is_setup(vq))
continue; continue;
...@@ -1611,7 +1615,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ...@@ -1611,7 +1615,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
goto destroy_vq_cmds; goto destroy_vq_cmds;
} }
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vhost_vq_set_backend(vq, vs_tpg); vhost_vq_set_backend(vq, vs_tpg);
...@@ -1713,7 +1717,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, ...@@ -1713,7 +1717,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
target_undepend_item(&se_tpg->tpg_group.cg_item); target_undepend_item(&se_tpg->tpg_group.cg_item);
} }
if (match) { if (match) {
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vhost_vq_set_backend(vq, NULL); vhost_vq_set_backend(vq, NULL);
...@@ -1722,7 +1726,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, ...@@ -1722,7 +1726,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
/* Make sure cmds are not running before tearing them down. */ /* Make sure cmds are not running before tearing them down. */
vhost_scsi_flush(vs); vhost_scsi_flush(vs);
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
vhost_scsi_destroy_vq_cmds(vq); vhost_scsi_destroy_vq_cmds(vq);
} }
...@@ -1762,7 +1766,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) ...@@ -1762,7 +1766,7 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
return -EFAULT; return -EFAULT;
} }
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { for (i = 0; i < vs->dev.nvqs; i++) {
vq = &vs->vqs[i].vq; vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex); mutex_lock(&vq->mutex);
vq->acked_features = features; vq->acked_features = features;
...@@ -1776,16 +1780,40 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) ...@@ -1776,16 +1780,40 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
{ {
struct vhost_scsi *vs; struct vhost_scsi *vs;
struct vhost_virtqueue **vqs; struct vhost_virtqueue **vqs;
int r = -ENOMEM, i; int r = -ENOMEM, i, nvqs = vhost_scsi_max_io_vqs;
vs = kvzalloc(sizeof(*vs), GFP_KERNEL); vs = kvzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs) if (!vs)
goto err_vs; goto err_vs;
vqs = kmalloc_array(VHOST_SCSI_MAX_VQ, sizeof(*vqs), GFP_KERNEL); if (nvqs > VHOST_SCSI_MAX_IO_VQ) {
if (!vqs) pr_err("Invalid max_io_vqs of %d. Using %d.\n", nvqs,
VHOST_SCSI_MAX_IO_VQ);
nvqs = VHOST_SCSI_MAX_IO_VQ;
} else if (nvqs == 0) {
pr_err("Invalid max_io_vqs of %d. Using 1.\n", nvqs);
nvqs = 1;
}
nvqs += VHOST_SCSI_VQ_IO;
vs->compl_bitmap = bitmap_alloc(nvqs, GFP_KERNEL);
if (!vs->compl_bitmap)
goto err_compl_bitmap;
vs->old_inflight = kmalloc_array(nvqs, sizeof(*vs->old_inflight),
GFP_KERNEL | __GFP_ZERO);
if (!vs->old_inflight)
goto err_inflight;
vs->vqs = kmalloc_array(nvqs, sizeof(*vs->vqs),
GFP_KERNEL | __GFP_ZERO);
if (!vs->vqs)
goto err_vqs; goto err_vqs;
vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
if (!vqs)
goto err_local_vqs;
vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work); vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work);
vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work); vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work);
...@@ -1796,11 +1824,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) ...@@ -1796,11 +1824,11 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { for (i = VHOST_SCSI_VQ_IO; i < nvqs; i++) {
vqs[i] = &vs->vqs[i].vq; vqs[i] = &vs->vqs[i].vq;
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
} }
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, vhost_dev_init(&vs->dev, vqs, nvqs, UIO_MAXIOV,
VHOST_SCSI_WEIGHT, 0, true, NULL); VHOST_SCSI_WEIGHT, 0, true, NULL);
vhost_scsi_init_inflight(vs, NULL); vhost_scsi_init_inflight(vs, NULL);
...@@ -1808,7 +1836,13 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) ...@@ -1808,7 +1836,13 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
f->private_data = vs; f->private_data = vs;
return 0; return 0;
err_local_vqs:
kfree(vs->vqs);
err_vqs: err_vqs:
kfree(vs->old_inflight);
err_inflight:
bitmap_free(vs->compl_bitmap);
err_compl_bitmap:
kvfree(vs); kvfree(vs);
err_vs: err_vs:
return r; return r;
...@@ -1826,6 +1860,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) ...@@ -1826,6 +1860,9 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
vhost_dev_stop(&vs->dev); vhost_dev_stop(&vs->dev);
vhost_dev_cleanup(&vs->dev); vhost_dev_cleanup(&vs->dev);
kfree(vs->dev.vqs); kfree(vs->dev.vqs);
kfree(vs->vqs);
kfree(vs->old_inflight);
bitmap_free(vs->compl_bitmap);
kvfree(vs); kvfree(vs);
return 0; return 0;
} }
......
...@@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, ...@@ -347,6 +347,14 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
return 0; return 0;
} }
static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
return ops->suspend;
}
static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
{ {
struct vdpa_device *vdpa = v->vdpa; struct vdpa_device *vdpa = v->vdpa;
...@@ -470,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) ...@@ -470,6 +478,22 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
return 0; return 0;
} }
/* After a successful return of ioctl the device must not process more
* virtqueue descriptors. The device can answer to read or writes of config
* fields as if it were not suspended. In particular, writing to "queue_enable"
* with a value of 1 will not make the device start processing buffers.
*/
static long vhost_vdpa_suspend(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
if (!ops->suspend)
return -EOPNOTSUPP;
return ops->suspend(vdpa);
}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp) void __user *argp)
{ {
...@@ -577,7 +601,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -577,7 +601,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
if (cmd == VHOST_SET_BACKEND_FEATURES) { if (cmd == VHOST_SET_BACKEND_FEATURES) {
if (copy_from_user(&features, featurep, sizeof(features))) if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT; return -EFAULT;
if (features & ~VHOST_VDPA_BACKEND_FEATURES) if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
BIT_ULL(VHOST_BACKEND_F_SUSPEND)))
return -EOPNOTSUPP;
if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
!vhost_vdpa_can_suspend(v))
return -EOPNOTSUPP; return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features); vhost_set_backend_features(&v->vdev, features);
return 0; return 0;
...@@ -628,6 +656,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -628,6 +656,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break; break;
case VHOST_GET_BACKEND_FEATURES: case VHOST_GET_BACKEND_FEATURES:
features = VHOST_VDPA_BACKEND_FEATURES; features = VHOST_VDPA_BACKEND_FEATURES;
if (vhost_vdpa_can_suspend(v))
features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
if (copy_to_user(featurep, &features, sizeof(features))) if (copy_to_user(featurep, &features, sizeof(features)))
r = -EFAULT; r = -EFAULT;
break; break;
...@@ -640,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -640,6 +670,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_VDPA_GET_VQS_COUNT: case VHOST_VDPA_GET_VQS_COUNT:
r = vhost_vdpa_get_vqs_count(v, argp); r = vhost_vdpa_get_vqs_count(v, argp);
break; break;
case VHOST_VDPA_SUSPEND:
r = vhost_vdpa_suspend(v);
break;
default: default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp); r = vhost_dev_ioctl(&v->vdev, cmd, argp);
if (r == -ENOIOCTLCMD) if (r == -ENOIOCTLCMD)
...@@ -1076,7 +1109,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) ...@@ -1076,7 +1109,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
if (!bus) if (!bus)
return -EFAULT; return -EFAULT;
if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY))
return -ENOTSUPP; return -ENOTSUPP;
v->domain = iommu_domain_alloc(bus); v->domain = iommu_domain_alloc(bus);
...@@ -1363,6 +1396,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) ...@@ -1363,6 +1396,7 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
err: err:
put_device(&v->dev); put_device(&v->dev);
ida_simple_remove(&vhost_vdpa_ida, v->minor);
return r; return r;
} }
......
...@@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(vringh_need_notify_kern); ...@@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(vringh_need_notify_kern);
#if IS_REACHABLE(CONFIG_VHOST_IOTLB) #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
static int iotlb_translate(const struct vringh *vrh, static int iotlb_translate(const struct vringh *vrh,
u64 addr, u64 len, struct bio_vec iov[], u64 addr, u64 len, u64 *translated,
struct bio_vec iov[],
int iov_size, u32 perm) int iov_size, u32 perm)
{ {
struct vhost_iotlb_map *map; struct vhost_iotlb_map *map;
...@@ -1136,43 +1137,76 @@ static int iotlb_translate(const struct vringh *vrh, ...@@ -1136,43 +1137,76 @@ static int iotlb_translate(const struct vringh *vrh,
spin_unlock(vrh->iotlb_lock); spin_unlock(vrh->iotlb_lock);
if (translated)
*translated = min(len, s);
return ret; return ret;
} }
static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len) void *src, size_t len)
{ {
struct iov_iter iter; u64 total_translated = 0;
struct bio_vec iov[16];
int ret;
ret = iotlb_translate(vrh, (u64)(uintptr_t)src, while (total_translated < len) {
len, iov, 16, VHOST_MAP_RO); struct bio_vec iov[16];
if (ret < 0) struct iov_iter iter;
return ret; u64 translated;
int ret;
iov_iter_bvec(&iter, READ, iov, ret, len); ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
len - total_translated, &translated,
iov, ARRAY_SIZE(iov), VHOST_MAP_RO);
if (ret == -ENOBUFS)
ret = ARRAY_SIZE(iov);
else if (ret < 0)
return ret;
ret = copy_from_iter(dst, len, &iter); iov_iter_bvec(&iter, READ, iov, ret, translated);
return ret; ret = copy_from_iter(dst, translated, &iter);
if (ret < 0)
return ret;
src += translated;
dst += translated;
total_translated += translated;
}
return total_translated;
} }
static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
void *src, size_t len) void *src, size_t len)
{ {
struct iov_iter iter; u64 total_translated = 0;
struct bio_vec iov[16];
int ret;
ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, while (total_translated < len) {
len, iov, 16, VHOST_MAP_WO); struct bio_vec iov[16];
if (ret < 0) struct iov_iter iter;
return ret; u64 translated;
int ret;
ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
len - total_translated, &translated,
iov, ARRAY_SIZE(iov), VHOST_MAP_WO);
if (ret == -ENOBUFS)
ret = ARRAY_SIZE(iov);
else if (ret < 0)
return ret;
iov_iter_bvec(&iter, WRITE, iov, ret, len); iov_iter_bvec(&iter, WRITE, iov, ret, translated);
ret = copy_to_iter(src, translated, &iter);
if (ret < 0)
return ret;
src += translated;
dst += translated;
total_translated += translated;
}
return copy_to_iter(src, len, &iter); return total_translated;
} }
static inline int getu16_iotlb(const struct vringh *vrh, static inline int getu16_iotlb(const struct vringh *vrh,
...@@ -1183,7 +1217,7 @@ static inline int getu16_iotlb(const struct vringh *vrh, ...@@ -1183,7 +1217,7 @@ static inline int getu16_iotlb(const struct vringh *vrh,
int ret; int ret;
/* Atomic read is needed for getu16 */ /* Atomic read is needed for getu16 */
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
&iov, 1, VHOST_MAP_RO); &iov, 1, VHOST_MAP_RO);
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -1204,7 +1238,7 @@ static inline int putu16_iotlb(const struct vringh *vrh, ...@@ -1204,7 +1238,7 @@ static inline int putu16_iotlb(const struct vringh *vrh,
int ret; int ret;
/* Atomic write is needed for putu16 */ /* Atomic write is needed for putu16 */
ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), NULL,
&iov, 1, VHOST_MAP_WO); &iov, 1, VHOST_MAP_WO);
if (ret < 0) if (ret < 0)
return ret; return ret;
......
...@@ -35,11 +35,12 @@ if VIRTIO_MENU ...@@ -35,11 +35,12 @@ if VIRTIO_MENU
config VIRTIO_HARDEN_NOTIFICATION config VIRTIO_HARDEN_NOTIFICATION
bool "Harden virtio notification" bool "Harden virtio notification"
depends on BROKEN
help help
Enable this to harden the device notifications and suppress Enable this to harden the device notifications and suppress
those that happen at a time where notifications are illegal. those that happen at a time where notifications are illegal.
Experimental: Note that several drivers still have bugs that Experimental: Note that several drivers still have issues that
may cause crashes or hangs when correct handling of may cause crashes or hangs when correct handling of
notifications is enforced; depending on the subset of notifications is enforced; depending on the subset of
drivers and devices you use, this may or may not work. drivers and devices you use, this may or may not work.
...@@ -126,9 +127,11 @@ config VIRTIO_MEM ...@@ -126,9 +127,11 @@ config VIRTIO_MEM
This driver provides access to virtio-mem paravirtualized memory This driver provides access to virtio-mem paravirtualized memory
devices, allowing to hotplug and hotunplug memory. devices, allowing to hotplug and hotunplug memory.
This driver was only tested under x86-64 and arm64, but should This driver currently only supports x86-64 and arm64. Although it
theoretically work on all architectures that support memory hotplug should compile on other architectures that implement memory
and hotremove. hot(un)plug, architecture-specific and/or common
code changes may be required for virtio-mem, kdump and kexec to work as
expected.
If unsure, say M. If unsure, say M.
......
...@@ -428,7 +428,9 @@ int register_virtio_device(struct virtio_device *dev) ...@@ -428,7 +428,9 @@ int register_virtio_device(struct virtio_device *dev)
goto out; goto out;
dev->index = err; dev->index = err;
dev_set_name(&dev->dev, "virtio%u", dev->index); err = dev_set_name(&dev->dev, "virtio%u", dev->index);
if (err)
goto out_ida_remove;
err = virtio_device_of_init(dev); err = virtio_device_of_init(dev);
if (err) if (err)
......
...@@ -360,7 +360,7 @@ static void vm_synchronize_cbs(struct virtio_device *vdev) ...@@ -360,7 +360,7 @@ static void vm_synchronize_cbs(struct virtio_device *vdev)
static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index, static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, bool ctx) const char *name, u32 size, bool ctx)
{ {
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtio_mmio_vq_info *info; struct virtio_mmio_vq_info *info;
...@@ -395,14 +395,19 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in ...@@ -395,14 +395,19 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned int in
goto error_new_virtqueue; goto error_new_virtqueue;
} }
if (!size || size > num)
size = num;
/* Create the vring */ /* Create the vring */
vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev, vq = vring_create_virtqueue(index, size, VIRTIO_MMIO_VRING_ALIGN, vdev,
true, true, ctx, vm_notify, callback, name); true, true, ctx, vm_notify, callback, name);
if (!vq) { if (!vq) {
err = -ENOMEM; err = -ENOMEM;
goto error_new_virtqueue; goto error_new_virtqueue;
} }
vq->num_max = num;
/* Activate the queue */ /* Activate the queue */
writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM); writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
if (vm_dev->version == 1) { if (vm_dev->version == 1) {
...@@ -472,6 +477,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -472,6 +477,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
...@@ -487,6 +493,9 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -487,6 +493,9 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
if (err) if (err)
return err; return err;
if (of_property_read_bool(vm_dev->pdev->dev.of_node, "wakeup-source"))
enable_irq_wake(irq);
for (i = 0; i < nvqs; ++i) { for (i = 0; i < nvqs; ++i) {
if (!names[i]) { if (!names[i]) {
vqs[i] = NULL; vqs[i] = NULL;
...@@ -494,6 +503,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -494,6 +503,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
} }
vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i], vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
sizes ? sizes[i] : 0,
ctx ? ctx[i] : false); ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
vm_del_vqs(vdev); vm_del_vqs(vdev);
......
...@@ -174,6 +174,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, ...@@ -174,6 +174,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
u32 size,
bool ctx, bool ctx,
u16 msix_vec) u16 msix_vec)
{ {
...@@ -186,7 +187,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in ...@@ -186,7 +187,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in
if (!info) if (!info)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx, vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, size, ctx,
msix_vec); msix_vec);
if (IS_ERR(vq)) if (IS_ERR(vq))
goto out_info; goto out_info;
...@@ -214,9 +215,15 @@ static void vp_del_vq(struct virtqueue *vq) ...@@ -214,9 +215,15 @@ static void vp_del_vq(struct virtqueue *vq)
struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&vp_dev->lock, flags); /*
list_del(&info->node); * If it fails during re-enable reset vq. This way we won't rejoin
spin_unlock_irqrestore(&vp_dev->lock, flags); * info->node to the queue. Prevent unexpected irqs.
*/
if (!vq->reset) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
}
vp_dev->del_vq(info); vp_dev->del_vq(info);
kfree(info); kfree(info);
...@@ -277,7 +284,7 @@ void vp_del_vqs(struct virtio_device *vdev) ...@@ -277,7 +284,7 @@ void vp_del_vqs(struct virtio_device *vdev)
static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], bool per_vq_vectors, const char * const names[], u32 sizes[], bool per_vq_vectors,
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
...@@ -320,8 +327,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, ...@@ -320,8 +327,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
else else
msix_vec = VP_MSIX_VQ_VECTOR; msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i], vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
ctx ? ctx[i] : false, sizes ? sizes[i] : 0,
msix_vec); ctx ? ctx[i] : false, msix_vec);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]); err = PTR_ERR(vqs[i]);
goto error_find; goto error_find;
...@@ -351,7 +358,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, ...@@ -351,7 +358,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs,
static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx) const char * const names[], u32 sizes[], const bool *ctx)
{ {
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i, err, queue_idx = 0; int i, err, queue_idx = 0;
...@@ -373,6 +380,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, ...@@ -373,6 +380,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
continue; continue;
} }
vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i], vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
sizes ? sizes[i] : 0,
ctx ? ctx[i] : false, ctx ? ctx[i] : false,
VIRTIO_MSI_NO_VECTOR); VIRTIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
...@@ -390,21 +398,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, ...@@ -390,21 +398,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs,
/* the config->find_vqs() implementation */ /* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
int err; int err;
/* Try MSI-X with one vector per queue. */ /* Try MSI-X with one vector per queue. */
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, desc); err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, true, ctx, desc);
if (!err) if (!err)
return 0; return 0;
/* Fallback: MSI-X with one vector for config, one shared for queues. */ /* Fallback: MSI-X with one vector for config, one shared for queues. */
err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, desc); err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, false, ctx, desc);
if (!err) if (!err)
return 0; return 0;
/* Finally fall back to regular interrupts. */ /* Finally fall back to regular interrupts. */
return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx); return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, sizes, ctx);
} }
const char *vp_bus_name(struct virtio_device *vdev) const char *vp_bus_name(struct virtio_device *vdev)
......
...@@ -80,6 +80,7 @@ struct virtio_pci_device { ...@@ -80,6 +80,7 @@ struct virtio_pci_device {
unsigned int idx, unsigned int idx,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
u32 size,
bool ctx, bool ctx,
u16 msix_vec); u16 msix_vec);
void (*del_vq)(struct virtio_pci_vq_info *info); void (*del_vq)(struct virtio_pci_vq_info *info);
...@@ -110,7 +111,7 @@ void vp_del_vqs(struct virtio_device *vdev); ...@@ -110,7 +111,7 @@ void vp_del_vqs(struct virtio_device *vdev);
/* the config->find_vqs() implementation */ /* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *desc); struct irq_affinity *desc);
const char *vp_bus_name(struct virtio_device *vdev); const char *vp_bus_name(struct virtio_device *vdev);
......
...@@ -112,6 +112,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -112,6 +112,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
unsigned int index, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
u32 size,
bool ctx, bool ctx,
u16 msix_vec) u16 msix_vec)
{ {
...@@ -125,16 +126,21 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -125,16 +126,21 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index)) if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index))
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
if (!size || size > num)
size = num;
info->msix_vector = msix_vec; info->msix_vector = msix_vec;
/* create the vring */ /* create the vring */
vq = vring_create_virtqueue(index, num, vq = vring_create_virtqueue(index, size,
VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev, VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
true, false, ctx, true, false, ctx,
vp_notify, callback, name); vp_notify, callback, name);
if (!vq) if (!vq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
vq->num_max = num;
q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
if (q_pfn >> 32) { if (q_pfn >> 32) {
dev_err(&vp_dev->pci_dev->dev, dev_err(&vp_dev->pci_dev->dev,
......
...@@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features) ...@@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features)
if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) && if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV)) pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
__virtio_set_bit(vdev, VIRTIO_F_SR_IOV); __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
if (features & BIT_ULL(VIRTIO_F_RING_RESET))
__virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
} }
/* virtio config->finalize_features() implementation */ /* virtio config->finalize_features() implementation */
...@@ -176,6 +179,110 @@ static void vp_reset(struct virtio_device *vdev) ...@@ -176,6 +179,110 @@ static void vp_reset(struct virtio_device *vdev)
vp_synchronize_vectors(vdev); vp_synchronize_vectors(vdev);
} }
static int vp_active_vq(struct virtqueue *vq, u16 msix_vec)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
unsigned long index;
index = vq->index;
/* activate the queue */
vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
virtqueue_get_avail_addr(vq),
virtqueue_get_used_addr(vq));
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
if (msix_vec == VIRTIO_MSI_NO_VECTOR)
return -EBUSY;
}
return 0;
}
static int vp_modern_disable_vq_and_reset(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtio_pci_vq_info *info;
unsigned long flags;
if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
return -ENOENT;
vp_modern_set_queue_reset(mdev, vq->index);
info = vp_dev->vqs[vq->index];
/* delete vq from irq handler */
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
INIT_LIST_HEAD(&info->node);
#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
__virtqueue_break(vq);
#endif
/* For the case where vq has an exclusive irq, call synchronize_irq() to
* wait for completion.
*
* note: We can't use disable_irq() since it conflicts with the affinity
* managed IRQ that is used by some drivers.
*/
if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
synchronize_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
vq->reset = true;
return 0;
}
static int vp_modern_enable_vq_after_reset(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtio_pci_vq_info *info;
unsigned long flags, index;
int err;
if (!vq->reset)
return -EBUSY;
index = vq->index;
info = vp_dev->vqs[index];
if (vp_modern_get_queue_reset(mdev, index))
return -EBUSY;
if (vp_modern_get_queue_enable(mdev, index))
return -EBUSY;
err = vp_active_vq(vq, info->msix_vector);
if (err)
return err;
if (vq->callback) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_add(&info->node, &vp_dev->virtqueues);
spin_unlock_irqrestore(&vp_dev->lock, flags);
} else {
INIT_LIST_HEAD(&info->node);
}
#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
__virtqueue_unbreak(vq);
#endif
vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
vq->reset = false;
return 0;
}
static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
{ {
return vp_modern_config_vector(&vp_dev->mdev, vector); return vp_modern_config_vector(&vp_dev->mdev, vector);
...@@ -186,6 +293,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -186,6 +293,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
unsigned int index, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, const char *name,
u32 size,
bool ctx, bool ctx,
u16 msix_vec) u16 msix_vec)
{ {
...@@ -203,47 +311,39 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -203,47 +311,39 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
if (!num || vp_modern_get_queue_enable(mdev, index)) if (!num || vp_modern_get_queue_enable(mdev, index))
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
if (num & (num - 1)) { if (!size || size > num)
dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num); size = num;
if (size & (size - 1)) {
dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", size);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
info->msix_vector = msix_vec; info->msix_vector = msix_vec;
/* create the vring */ /* create the vring */
vq = vring_create_virtqueue(index, num, vq = vring_create_virtqueue(index, size,
SMP_CACHE_BYTES, &vp_dev->vdev, SMP_CACHE_BYTES, &vp_dev->vdev,
true, true, ctx, true, true, ctx,
vp_notify, callback, name); vp_notify, callback, name);
if (!vq) if (!vq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
/* activate the queue */ vq->num_max = num;
vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq), err = vp_active_vq(vq, msix_vec);
virtqueue_get_avail_addr(vq), if (err)
virtqueue_get_used_addr(vq)); goto err;
vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL); vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
if (!vq->priv) { if (!vq->priv) {
err = -ENOMEM; err = -ENOMEM;
goto err_map_notify; goto err;
}
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto err_assign_vector;
}
} }
return vq; return vq;
err_assign_vector: err:
if (!mdev->notify_base)
pci_iounmap(mdev->pci_dev, (void __iomem __force *)vq->priv);
err_map_notify:
vring_del_virtqueue(vq); vring_del_virtqueue(vq);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -251,12 +351,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ...@@ -251,12 +351,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs, static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq; struct virtqueue *vq;
int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc); int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, ctx,
desc);
if (rc) if (rc)
return rc; return rc;
...@@ -401,6 +504,8 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { ...@@ -401,6 +504,8 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
.set_vq_affinity = vp_set_vq_affinity, .set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity, .get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region, .get_shm_region = vp_get_shm_region,
.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
}; };
static const struct virtio_config_ops virtio_pci_config_ops = { static const struct virtio_config_ops virtio_pci_config_ops = {
...@@ -419,6 +524,8 @@ static const struct virtio_config_ops virtio_pci_config_ops = { ...@@ -419,6 +524,8 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
.set_vq_affinity = vp_set_vq_affinity, .set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity, .get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region, .get_shm_region = vp_get_shm_region,
.disable_vq_and_reset = vp_modern_disable_vq_and_reset,
.enable_vq_after_reset = vp_modern_enable_vq_after_reset,
}; };
/* the PCI probing function */ /* the PCI probing function */
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/virtio_pci_modern.h> #include <linux/virtio_pci_modern.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/delay.h>
/* /*
* vp_modern_map_capability - map a part of virtio pci capability * vp_modern_map_capability - map a part of virtio pci capability
...@@ -474,6 +475,44 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev, ...@@ -474,6 +475,44 @@ void vp_modern_set_status(struct virtio_pci_modern_device *mdev,
} }
EXPORT_SYMBOL_GPL(vp_modern_set_status); EXPORT_SYMBOL_GPL(vp_modern_set_status);
/*
* vp_modern_get_queue_reset - get the queue reset status
* @mdev: the modern virtio-pci device
* @index: queue index
*/
int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index)
{
struct virtio_pci_modern_common_cfg __iomem *cfg;
cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
vp_iowrite16(index, &cfg->cfg.queue_select);
return vp_ioread16(&cfg->queue_reset);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_reset);
/*
* vp_modern_set_queue_reset - reset the queue
* @mdev: the modern virtio-pci device
* @index: queue index
*/
void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index)
{
struct virtio_pci_modern_common_cfg __iomem *cfg;
cfg = (struct virtio_pci_modern_common_cfg __iomem *)mdev->common;
vp_iowrite16(index, &cfg->cfg.queue_select);
vp_iowrite16(1, &cfg->queue_reset);
while (vp_ioread16(&cfg->queue_reset))
msleep(1);
while (vp_ioread16(&cfg->cfg.queue_enable))
msleep(1);
}
EXPORT_SYMBOL_GPL(vp_modern_set_queue_reset);
/* /*
* vp_modern_queue_vector - set the MSIX vector for a specific virtqueue * vp_modern_queue_vector - set the MSIX vector for a specific virtqueue
* @mdev: the modern virtio-pci device * @mdev: the modern virtio-pci device
......
...@@ -85,6 +85,71 @@ struct vring_desc_extra { ...@@ -85,6 +85,71 @@ struct vring_desc_extra {
u16 next; /* The next desc state in a list. */ u16 next; /* The next desc state in a list. */
}; };
struct vring_virtqueue_split {
/* Actual memory layout for this queue. */
struct vring vring;
/* Last written value to avail->flags */
u16 avail_flags_shadow;
/*
* Last written value to avail->idx in
* guest byte order.
*/
u16 avail_idx_shadow;
/* Per-descriptor state. */
struct vring_desc_state_split *desc_state;
struct vring_desc_extra *desc_extra;
/* DMA address and size information */
dma_addr_t queue_dma_addr;
size_t queue_size_in_bytes;
/*
* The parameters for creating vrings are reserved for creating new
* vring.
*/
u32 vring_align;
bool may_reduce_num;
};
struct vring_virtqueue_packed {
/* Actual memory layout for this queue. */
struct {
unsigned int num;
struct vring_packed_desc *desc;
struct vring_packed_desc_event *driver;
struct vring_packed_desc_event *device;
} vring;
/* Driver ring wrap counter. */
bool avail_wrap_counter;
/* Avail used flags. */
u16 avail_used_flags;
/* Index of the next avail descriptor. */
u16 next_avail_idx;
/*
* Last written value to driver->flags in
* guest byte order.
*/
u16 event_flags_shadow;
/* Per-descriptor state. */
struct vring_desc_state_packed *desc_state;
struct vring_desc_extra *desc_extra;
/* DMA address and size information */
dma_addr_t ring_dma_addr;
dma_addr_t driver_event_dma_addr;
dma_addr_t device_event_dma_addr;
size_t ring_size_in_bytes;
size_t event_size_in_bytes;
};
struct vring_virtqueue { struct vring_virtqueue {
struct virtqueue vq; struct virtqueue vq;
...@@ -124,64 +189,10 @@ struct vring_virtqueue { ...@@ -124,64 +189,10 @@ struct vring_virtqueue {
union { union {
/* Available for split ring */ /* Available for split ring */
struct { struct vring_virtqueue_split split;
/* Actual memory layout for this queue. */
struct vring vring;
/* Last written value to avail->flags */
u16 avail_flags_shadow;
/*
* Last written value to avail->idx in
* guest byte order.
*/
u16 avail_idx_shadow;
/* Per-descriptor state. */
struct vring_desc_state_split *desc_state;
struct vring_desc_extra *desc_extra;
/* DMA address and size information */
dma_addr_t queue_dma_addr;
size_t queue_size_in_bytes;
} split;
/* Available for packed ring */ /* Available for packed ring */
struct { struct vring_virtqueue_packed packed;
/* Actual memory layout for this queue. */
struct {
unsigned int num;
struct vring_packed_desc *desc;
struct vring_packed_desc_event *driver;
struct vring_packed_desc_event *device;
} vring;
/* Driver ring wrap counter. */
bool avail_wrap_counter;
/* Avail used flags. */
u16 avail_used_flags;
/* Index of the next avail descriptor. */
u16 next_avail_idx;
/*
* Last written value to driver->flags in
* guest byte order.
*/
u16 event_flags_shadow;
/* Per-descriptor state. */
struct vring_desc_state_packed *desc_state;
struct vring_desc_extra *desc_extra;
/* DMA address and size information */
dma_addr_t ring_dma_addr;
dma_addr_t driver_event_dma_addr;
dma_addr_t device_event_dma_addr;
size_t ring_size_in_bytes;
size_t event_size_in_bytes;
} packed;
}; };
/* How to notify other side. FIXME: commonalize hcalls! */ /* How to notify other side. FIXME: commonalize hcalls! */
...@@ -200,6 +211,16 @@ struct vring_virtqueue { ...@@ -200,6 +211,16 @@ struct vring_virtqueue {
#endif #endif
}; };
static struct virtqueue *__vring_new_virtqueue(unsigned int index,
struct vring_virtqueue_split *vring_split,
struct virtio_device *vdev,
bool weak_barriers,
bool context,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name);
static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
static void vring_free(struct virtqueue *_vq);
/* /*
* Helpers. * Helpers.
...@@ -364,6 +385,24 @@ static int vring_mapping_error(const struct vring_virtqueue *vq, ...@@ -364,6 +385,24 @@ static int vring_mapping_error(const struct vring_virtqueue *vq,
return dma_mapping_error(vring_dma_dev(vq), addr); return dma_mapping_error(vring_dma_dev(vq), addr);
} }
static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
{
vq->vq.num_free = num;
if (vq->packed_ring)
vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
else
vq->last_used_idx = 0;
vq->event_triggered = false;
vq->num_added = 0;
#ifdef DEBUG
vq->in_use = false;
vq->last_add_time_valid = false;
#endif
}
/* /*
* Split ring specific functions - *_split(). * Split ring specific functions - *_split().
...@@ -907,28 +946,107 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) ...@@ -907,28 +946,107 @@ static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
return NULL; return NULL;
} }
static struct virtqueue *vring_create_virtqueue_split( static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split,
unsigned int index, struct vring_virtqueue *vq)
unsigned int num, {
unsigned int vring_align, struct virtio_device *vdev;
struct virtio_device *vdev,
bool weak_barriers, vdev = vq->vq.vdev;
bool may_reduce_num,
bool context, vring_split->avail_flags_shadow = 0;
bool (*notify)(struct virtqueue *), vring_split->avail_idx_shadow = 0;
void (*callback)(struct virtqueue *),
const char *name) /* No callback? Tell other side not to bother us. */
if (!vq->vq.callback) {
vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
if (!vq->event)
vring_split->vring.avail->flags = cpu_to_virtio16(vdev,
vring_split->avail_flags_shadow);
}
}
static void virtqueue_reinit_split(struct vring_virtqueue *vq)
{
int num;
num = vq->split.vring.num;
vq->split.vring.avail->flags = 0;
vq->split.vring.avail->idx = 0;
/* reset avail event */
vq->split.vring.avail->ring[num] = 0;
vq->split.vring.used->flags = 0;
vq->split.vring.used->idx = 0;
/* reset used event */
*(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0;
virtqueue_init(vq, num);
virtqueue_vring_init_split(&vq->split, vq);
}
static void virtqueue_vring_attach_split(struct vring_virtqueue *vq,
struct vring_virtqueue_split *vring_split)
{
vq->split = *vring_split;
/* Put everything in free lists. */
vq->free_head = 0;
}
static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split)
{
struct vring_desc_state_split *state;
struct vring_desc_extra *extra;
u32 num = vring_split->vring.num;
state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL);
if (!state)
goto err_state;
extra = vring_alloc_desc_extra(num);
if (!extra)
goto err_extra;
memset(state, 0, num * sizeof(struct vring_desc_state_split));
vring_split->desc_state = state;
vring_split->desc_extra = extra;
return 0;
err_extra:
kfree(state);
err_state:
return -ENOMEM;
}
static void vring_free_split(struct vring_virtqueue_split *vring_split,
struct virtio_device *vdev)
{
vring_free_queue(vdev, vring_split->queue_size_in_bytes,
vring_split->vring.desc,
vring_split->queue_dma_addr);
kfree(vring_split->desc_state);
kfree(vring_split->desc_extra);
}
static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
struct virtio_device *vdev,
u32 num,
unsigned int vring_align,
bool may_reduce_num)
{ {
struct virtqueue *vq;
void *queue = NULL; void *queue = NULL;
dma_addr_t dma_addr; dma_addr_t dma_addr;
size_t queue_size_in_bytes;
struct vring vring;
/* We assume num is a power of 2. */ /* We assume num is a power of 2. */
if (num & (num - 1)) { if (num & (num - 1)) {
dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
return NULL; return -EINVAL;
} }
/* TODO: allocate each queue chunk individually */ /* TODO: allocate each queue chunk individually */
...@@ -939,11 +1057,11 @@ static struct virtqueue *vring_create_virtqueue_split( ...@@ -939,11 +1057,11 @@ static struct virtqueue *vring_create_virtqueue_split(
if (queue) if (queue)
break; break;
if (!may_reduce_num) if (!may_reduce_num)
return NULL; return -ENOMEM;
} }
if (!num) if (!num)
return NULL; return -ENOMEM;
if (!queue) { if (!queue) {
/* Try to get a single page. You are my only hope! */ /* Try to get a single page. You are my only hope! */
...@@ -951,26 +1069,85 @@ static struct virtqueue *vring_create_virtqueue_split( ...@@ -951,26 +1069,85 @@ static struct virtqueue *vring_create_virtqueue_split(
&dma_addr, GFP_KERNEL|__GFP_ZERO); &dma_addr, GFP_KERNEL|__GFP_ZERO);
} }
if (!queue) if (!queue)
return NULL; return -ENOMEM;
vring_init(&vring_split->vring, num, queue, vring_align);
vring_split->queue_dma_addr = dma_addr;
vring_split->queue_size_in_bytes = vring_size(num, vring_align);
queue_size_in_bytes = vring_size(num, vring_align); vring_split->vring_align = vring_align;
vring_init(&vring, num, queue, vring_align); vring_split->may_reduce_num = may_reduce_num;
vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, return 0;
notify, callback, name); }
static struct virtqueue *vring_create_virtqueue_split(
unsigned int index,
unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
bool may_reduce_num,
bool context,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name)
{
struct vring_virtqueue_split vring_split = {};
struct virtqueue *vq;
int err;
err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align,
may_reduce_num);
if (err)
return NULL;
vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
context, notify, callback, name);
if (!vq) { if (!vq) {
vring_free_queue(vdev, queue_size_in_bytes, queue, vring_free_split(&vring_split, vdev);
dma_addr);
return NULL; return NULL;
} }
to_vvq(vq)->split.queue_dma_addr = dma_addr;
to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
to_vvq(vq)->we_own_ring = true; to_vvq(vq)->we_own_ring = true;
return vq; return vq;
} }
static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
{
struct vring_virtqueue_split vring_split = {};
struct vring_virtqueue *vq = to_vvq(_vq);
struct virtio_device *vdev = _vq->vdev;
int err;
err = vring_alloc_queue_split(&vring_split, vdev, num,
vq->split.vring_align,
vq->split.may_reduce_num);
if (err)
goto err;
err = vring_alloc_state_extra_split(&vring_split);
if (err)
goto err_state_extra;
vring_free(&vq->vq);
virtqueue_vring_init_split(&vring_split, vq);
virtqueue_init(vq, vring_split.vring.num);
virtqueue_vring_attach_split(vq, &vring_split);
return 0;
err_state_extra:
vring_free_split(&vring_split, vdev);
err:
virtqueue_reinit_split(vq);
return -ENOMEM;
}
/* /*
* Packed ring specific functions - *_packed(). * Packed ring specific functions - *_packed().
...@@ -1637,8 +1814,7 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) ...@@ -1637,8 +1814,7 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
return NULL; return NULL;
} }
static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num)
unsigned int num)
{ {
struct vring_desc_extra *desc_extra; struct vring_desc_extra *desc_extra;
unsigned int i; unsigned int i;
...@@ -1656,19 +1832,32 @@ static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *v ...@@ -1656,19 +1832,32 @@ static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *v
return desc_extra; return desc_extra;
} }
static struct virtqueue *vring_create_virtqueue_packed( static void vring_free_packed(struct vring_virtqueue_packed *vring_packed,
unsigned int index, struct virtio_device *vdev)
unsigned int num, {
unsigned int vring_align, if (vring_packed->vring.desc)
struct virtio_device *vdev, vring_free_queue(vdev, vring_packed->ring_size_in_bytes,
bool weak_barriers, vring_packed->vring.desc,
bool may_reduce_num, vring_packed->ring_dma_addr);
bool context,
bool (*notify)(struct virtqueue *), if (vring_packed->vring.driver)
void (*callback)(struct virtqueue *), vring_free_queue(vdev, vring_packed->event_size_in_bytes,
const char *name) vring_packed->vring.driver,
vring_packed->driver_event_dma_addr);
if (vring_packed->vring.device)
vring_free_queue(vdev, vring_packed->event_size_in_bytes,
vring_packed->vring.device,
vring_packed->device_event_dma_addr);
kfree(vring_packed->desc_state);
kfree(vring_packed->desc_extra);
}
static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed,
struct virtio_device *vdev,
u32 num)
{ {
struct vring_virtqueue *vq;
struct vring_packed_desc *ring; struct vring_packed_desc *ring;
struct vring_packed_desc_event *driver, *device; struct vring_packed_desc_event *driver, *device;
dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
...@@ -1680,7 +1869,11 @@ static struct virtqueue *vring_create_virtqueue_packed( ...@@ -1680,7 +1869,11 @@ static struct virtqueue *vring_create_virtqueue_packed(
&ring_dma_addr, &ring_dma_addr,
GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
if (!ring) if (!ring)
goto err_ring; goto err;
vring_packed->vring.desc = ring;
vring_packed->ring_dma_addr = ring_dma_addr;
vring_packed->ring_size_in_bytes = ring_size_in_bytes;
event_size_in_bytes = sizeof(struct vring_packed_desc_event); event_size_in_bytes = sizeof(struct vring_packed_desc_event);
...@@ -1688,13 +1881,112 @@ static struct virtqueue *vring_create_virtqueue_packed( ...@@ -1688,13 +1881,112 @@ static struct virtqueue *vring_create_virtqueue_packed(
&driver_event_dma_addr, &driver_event_dma_addr,
GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
if (!driver) if (!driver)
goto err_driver; goto err;
vring_packed->vring.driver = driver;
vring_packed->event_size_in_bytes = event_size_in_bytes;
vring_packed->driver_event_dma_addr = driver_event_dma_addr;
device = vring_alloc_queue(vdev, event_size_in_bytes, device = vring_alloc_queue(vdev, event_size_in_bytes,
&device_event_dma_addr, &device_event_dma_addr,
GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
if (!device) if (!device)
goto err_device; goto err;
vring_packed->vring.device = device;
vring_packed->device_event_dma_addr = device_event_dma_addr;
vring_packed->vring.num = num;
return 0;
err:
vring_free_packed(vring_packed, vdev);
return -ENOMEM;
}
static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed)
{
struct vring_desc_state_packed *state;
struct vring_desc_extra *extra;
u32 num = vring_packed->vring.num;
state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL);
if (!state)
goto err_desc_state;
memset(state, 0, num * sizeof(struct vring_desc_state_packed));
extra = vring_alloc_desc_extra(num);
if (!extra)
goto err_desc_extra;
vring_packed->desc_state = state;
vring_packed->desc_extra = extra;
return 0;
err_desc_extra:
kfree(state);
err_desc_state:
return -ENOMEM;
}
static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed,
bool callback)
{
vring_packed->next_avail_idx = 0;
vring_packed->avail_wrap_counter = 1;
vring_packed->event_flags_shadow = 0;
vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
/* No callback? Tell other side not to bother us. */
if (!callback) {
vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
vring_packed->vring.driver->flags =
cpu_to_le16(vring_packed->event_flags_shadow);
}
}
static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq,
struct vring_virtqueue_packed *vring_packed)
{
vq->packed = *vring_packed;
/* Put everything in free lists. */
vq->free_head = 0;
}
static void virtqueue_reinit_packed(struct vring_virtqueue *vq)
{
memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes);
memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes);
/* we need to reset the desc.flags. For more, see is_used_desc_packed() */
memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes);
virtqueue_init(vq, vq->packed.vring.num);
virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
}
static struct virtqueue *vring_create_virtqueue_packed(
unsigned int index,
unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
bool may_reduce_num,
bool context,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name)
{
struct vring_virtqueue_packed vring_packed = {};
struct vring_virtqueue *vq;
int err;
if (vring_alloc_queue_packed(&vring_packed, vdev, num))
goto err_ring;
vq = kmalloc(sizeof(*vq), GFP_KERNEL); vq = kmalloc(sizeof(*vq), GFP_KERNEL);
if (!vq) if (!vq)
...@@ -1703,8 +1995,8 @@ static struct virtqueue *vring_create_virtqueue_packed( ...@@ -1703,8 +1995,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
vq->vq.callback = callback; vq->vq.callback = callback;
vq->vq.vdev = vdev; vq->vq.vdev = vdev;
vq->vq.name = name; vq->vq.name = name;
vq->vq.num_free = num;
vq->vq.index = index; vq->vq.index = index;
vq->vq.reset = false;
vq->we_own_ring = true; vq->we_own_ring = true;
vq->notify = notify; vq->notify = notify;
vq->weak_barriers = weak_barriers; vq->weak_barriers = weak_barriers;
...@@ -1713,15 +2005,8 @@ static struct virtqueue *vring_create_virtqueue_packed( ...@@ -1713,15 +2005,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
#else #else
vq->broken = false; vq->broken = false;
#endif #endif
vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
vq->event_triggered = false;
vq->num_added = 0;
vq->packed_ring = true; vq->packed_ring = true;
vq->use_dma_api = vring_use_dma_api(vdev); vq->use_dma_api = vring_use_dma_api(vdev);
#ifdef DEBUG
vq->in_use = false;
vq->last_add_time_valid = false;
#endif
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
!context; !context;
...@@ -1730,65 +2015,58 @@ static struct virtqueue *vring_create_virtqueue_packed( ...@@ -1730,65 +2015,58 @@ static struct virtqueue *vring_create_virtqueue_packed(
if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
vq->weak_barriers = false; vq->weak_barriers = false;
vq->packed.ring_dma_addr = ring_dma_addr; err = vring_alloc_state_extra_packed(&vring_packed);
vq->packed.driver_event_dma_addr = driver_event_dma_addr; if (err)
vq->packed.device_event_dma_addr = device_event_dma_addr; goto err_state_extra;
vq->packed.ring_size_in_bytes = ring_size_in_bytes;
vq->packed.event_size_in_bytes = event_size_in_bytes;
vq->packed.vring.num = num;
vq->packed.vring.desc = ring;
vq->packed.vring.driver = driver;
vq->packed.vring.device = device;
vq->packed.next_avail_idx = 0;
vq->packed.avail_wrap_counter = 1;
vq->packed.event_flags_shadow = 0;
vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
vq->packed.desc_state = kmalloc_array(num,
sizeof(struct vring_desc_state_packed),
GFP_KERNEL);
if (!vq->packed.desc_state)
goto err_desc_state;
memset(vq->packed.desc_state, 0,
num * sizeof(struct vring_desc_state_packed));
/* Put everything in free lists. */
vq->free_head = 0;
vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); virtqueue_vring_init_packed(&vring_packed, !!callback);
if (!vq->packed.desc_extra)
goto err_desc_extra;
/* No callback? Tell other side not to bother us. */ virtqueue_init(vq, num);
if (!callback) { virtqueue_vring_attach_packed(vq, &vring_packed);
vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
vq->packed.vring.driver->flags =
cpu_to_le16(vq->packed.event_flags_shadow);
}
spin_lock(&vdev->vqs_list_lock); spin_lock(&vdev->vqs_list_lock);
list_add_tail(&vq->vq.list, &vdev->vqs); list_add_tail(&vq->vq.list, &vdev->vqs);
spin_unlock(&vdev->vqs_list_lock); spin_unlock(&vdev->vqs_list_lock);
return &vq->vq; return &vq->vq;
err_desc_extra: err_state_extra:
kfree(vq->packed.desc_state);
err_desc_state:
kfree(vq); kfree(vq);
err_vq: err_vq:
vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); vring_free_packed(&vring_packed, vdev);
err_device:
vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
err_driver:
vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
err_ring: err_ring:
return NULL; return NULL;
} }
static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num)
{
struct vring_virtqueue_packed vring_packed = {};
struct vring_virtqueue *vq = to_vvq(_vq);
struct virtio_device *vdev = _vq->vdev;
int err;
if (vring_alloc_queue_packed(&vring_packed, vdev, num))
goto err_ring;
err = vring_alloc_state_extra_packed(&vring_packed);
if (err)
goto err_state_extra;
vring_free(&vq->vq);
virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback);
virtqueue_init(vq, vring_packed.vring.num);
virtqueue_vring_attach_packed(vq, &vring_packed);
return 0;
err_state_extra:
vring_free_packed(&vring_packed, vdev);
err_ring:
virtqueue_reinit_packed(vq);
return -ENOMEM;
}
/* /*
* Generic functions and exported symbols. * Generic functions and exported symbols.
...@@ -2131,8 +2409,8 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); ...@@ -2131,8 +2409,8 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
* @_vq: the struct virtqueue we're talking about. * @_vq: the struct virtqueue we're talking about.
* *
* Returns NULL or the "data" token handed to virtqueue_add_*(). * Returns NULL or the "data" token handed to virtqueue_add_*().
* This is not valid on an active queue; it is useful only for device * This is not valid on an active queue; it is useful for device
* shutdown. * shutdown or the reset queue.
*/ */
void *virtqueue_detach_unused_buf(struct virtqueue *_vq) void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
{ {
...@@ -2180,16 +2458,17 @@ irqreturn_t vring_interrupt(int irq, void *_vq) ...@@ -2180,16 +2458,17 @@ irqreturn_t vring_interrupt(int irq, void *_vq)
EXPORT_SYMBOL_GPL(vring_interrupt); EXPORT_SYMBOL_GPL(vring_interrupt);
/* Only available for split ring */ /* Only available for split ring */
struct virtqueue *__vring_new_virtqueue(unsigned int index, static struct virtqueue *__vring_new_virtqueue(unsigned int index,
struct vring vring, struct vring_virtqueue_split *vring_split,
struct virtio_device *vdev, struct virtio_device *vdev,
bool weak_barriers, bool weak_barriers,
bool context, bool context,
bool (*notify)(struct virtqueue *), bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *), void (*callback)(struct virtqueue *),
const char *name) const char *name)
{ {
struct vring_virtqueue *vq; struct vring_virtqueue *vq;
int err;
if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
return NULL; return NULL;
...@@ -2202,8 +2481,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, ...@@ -2202,8 +2481,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
vq->vq.callback = callback; vq->vq.callback = callback;
vq->vq.vdev = vdev; vq->vq.vdev = vdev;
vq->vq.name = name; vq->vq.name = name;
vq->vq.num_free = vring.num;
vq->vq.index = index; vq->vq.index = index;
vq->vq.reset = false;
vq->we_own_ring = false; vq->we_own_ring = false;
vq->notify = notify; vq->notify = notify;
vq->weak_barriers = weak_barriers; vq->weak_barriers = weak_barriers;
...@@ -2212,14 +2491,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, ...@@ -2212,14 +2491,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
#else #else
vq->broken = false; vq->broken = false;
#endif #endif
vq->last_used_idx = 0;
vq->event_triggered = false;
vq->num_added = 0;
vq->use_dma_api = vring_use_dma_api(vdev); vq->use_dma_api = vring_use_dma_api(vdev);
#ifdef DEBUG
vq->in_use = false;
vq->last_add_time_valid = false;
#endif
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
!context; !context;
...@@ -2228,47 +2500,22 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, ...@@ -2228,47 +2500,22 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
vq->weak_barriers = false; vq->weak_barriers = false;
vq->split.queue_dma_addr = 0; err = vring_alloc_state_extra_split(vring_split);
vq->split.queue_size_in_bytes = 0; if (err) {
kfree(vq);
vq->split.vring = vring; return NULL;
vq->split.avail_flags_shadow = 0;
vq->split.avail_idx_shadow = 0;
/* No callback? Tell other side not to bother us. */
if (!callback) {
vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
if (!vq->event)
vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
vq->split.avail_flags_shadow);
} }
vq->split.desc_state = kmalloc_array(vring.num, virtqueue_vring_init_split(vring_split, vq);
sizeof(struct vring_desc_state_split), GFP_KERNEL);
if (!vq->split.desc_state)
goto err_state;
vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
if (!vq->split.desc_extra)
goto err_extra;
/* Put everything in free lists. */ virtqueue_init(vq, vring_split->vring.num);
vq->free_head = 0; virtqueue_vring_attach_split(vq, vring_split);
memset(vq->split.desc_state, 0, vring.num *
sizeof(struct vring_desc_state_split));
spin_lock(&vdev->vqs_list_lock); spin_lock(&vdev->vqs_list_lock);
list_add_tail(&vq->vq.list, &vdev->vqs); list_add_tail(&vq->vq.list, &vdev->vqs);
spin_unlock(&vdev->vqs_list_lock); spin_unlock(&vdev->vqs_list_lock);
return &vq->vq; return &vq->vq;
err_extra:
kfree(vq->split.desc_state);
err_state:
kfree(vq);
return NULL;
} }
EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
struct virtqueue *vring_create_virtqueue( struct virtqueue *vring_create_virtqueue(
unsigned int index, unsigned int index,
...@@ -2294,6 +2541,75 @@ struct virtqueue *vring_create_virtqueue( ...@@ -2294,6 +2541,75 @@ struct virtqueue *vring_create_virtqueue(
} }
EXPORT_SYMBOL_GPL(vring_create_virtqueue); EXPORT_SYMBOL_GPL(vring_create_virtqueue);
/**
* virtqueue_resize - resize the vring of vq
* @_vq: the struct virtqueue we're talking about.
* @num: new ring num
* @recycle: callback for recycle the useless buffer
*
* When it is really necessary to create a new vring, it will set the current vq
* into the reset state. Then call the passed callback to recycle the buffer
* that is no longer used. Only after the new vring is successfully created, the
* old vring will be released.
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error.
* 0: success.
* -ENOMEM: Failed to allocate a new ring, fall back to the original ring size.
* vq can still work normally
* -EBUSY: Failed to sync with device, vq may not work properly
* -ENOENT: Transport or device not supported
* -E2BIG/-EINVAL: num error
* -EPERM: Operation not permitted
*
*/
int virtqueue_resize(struct virtqueue *_vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf))
{
struct vring_virtqueue *vq = to_vvq(_vq);
struct virtio_device *vdev = vq->vq.vdev;
void *buf;
int err;
if (!vq->we_own_ring)
return -EPERM;
if (num > vq->vq.num_max)
return -E2BIG;
if (!num)
return -EINVAL;
if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num)
return 0;
if (!vdev->config->disable_vq_and_reset)
return -ENOENT;
if (!vdev->config->enable_vq_after_reset)
return -ENOENT;
err = vdev->config->disable_vq_and_reset(_vq);
if (err)
return err;
while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL)
recycle(_vq, buf);
if (vq->packed_ring)
err = virtqueue_resize_packed(_vq, num);
else
err = virtqueue_resize_split(_vq, num);
if (vdev->config->enable_vq_after_reset(_vq))
return -EBUSY;
return err;
}
EXPORT_SYMBOL_GPL(virtqueue_resize);
/* Only available for split ring */ /* Only available for split ring */
struct virtqueue *vring_new_virtqueue(unsigned int index, struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num, unsigned int num,
...@@ -2306,25 +2622,21 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, ...@@ -2306,25 +2622,21 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name) const char *name)
{ {
struct vring vring; struct vring_virtqueue_split vring_split = {};
if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
return NULL; return NULL;
vring_init(&vring, num, pages, vring_align); vring_init(&vring_split.vring, num, pages, vring_align);
return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers,
notify, callback, name); context, notify, callback, name);
} }
EXPORT_SYMBOL_GPL(vring_new_virtqueue); EXPORT_SYMBOL_GPL(vring_new_virtqueue);
void vring_del_virtqueue(struct virtqueue *_vq) static void vring_free(struct virtqueue *_vq)
{ {
struct vring_virtqueue *vq = to_vvq(_vq); struct vring_virtqueue *vq = to_vvq(_vq);
spin_lock(&vq->vq.vdev->vqs_list_lock);
list_del(&_vq->list);
spin_unlock(&vq->vq.vdev->vqs_list_lock);
if (vq->we_own_ring) { if (vq->we_own_ring) {
if (vq->packed_ring) { if (vq->packed_ring) {
vring_free_queue(vq->vq.vdev, vring_free_queue(vq->vq.vdev,
...@@ -2355,6 +2667,18 @@ void vring_del_virtqueue(struct virtqueue *_vq) ...@@ -2355,6 +2667,18 @@ void vring_del_virtqueue(struct virtqueue *_vq)
kfree(vq->split.desc_state); kfree(vq->split.desc_state);
kfree(vq->split.desc_extra); kfree(vq->split.desc_extra);
} }
}
void vring_del_virtqueue(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
spin_lock(&vq->vq.vdev->vqs_list_lock);
list_del(&_vq->list);
spin_unlock(&vq->vq.vdev->vqs_list_lock);
vring_free(_vq);
kfree(vq); kfree(vq);
} }
EXPORT_SYMBOL_GPL(vring_del_virtqueue); EXPORT_SYMBOL_GPL(vring_del_virtqueue);
...@@ -2402,6 +2726,30 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) ...@@ -2402,6 +2726,30 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
} }
EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
/*
* This function should only be called by the core, not directly by the driver.
*/
void __virtqueue_break(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
WRITE_ONCE(vq->broken, true);
}
EXPORT_SYMBOL_GPL(__virtqueue_break);
/*
* This function should only be called by the core, not directly by the driver.
*/
void __virtqueue_unbreak(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
/* Pairs with READ_ONCE() in virtqueue_is_broken(). */
WRITE_ONCE(vq->broken, false);
}
EXPORT_SYMBOL_GPL(__virtqueue_unbreak);
bool virtqueue_is_broken(struct virtqueue *_vq) bool virtqueue_is_broken(struct virtqueue *_vq)
{ {
struct vring_virtqueue *vq = to_vvq(_vq); struct vring_virtqueue *vq = to_vvq(_vq);
......
...@@ -131,7 +131,7 @@ static irqreturn_t virtio_vdpa_virtqueue_cb(void *private) ...@@ -131,7 +131,7 @@ static irqreturn_t virtio_vdpa_virtqueue_cb(void *private)
static struct virtqueue * static struct virtqueue *
virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name, bool ctx) const char *name, u32 size, bool ctx)
{ {
struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev); struct virtio_vdpa_device *vd_dev = to_virtio_vdpa_device(vdev);
struct vdpa_device *vdpa = vd_get_vdpa(vdev); struct vdpa_device *vdpa = vd_get_vdpa(vdev);
...@@ -168,14 +168,17 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, ...@@ -168,14 +168,17 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
goto error_new_virtqueue; goto error_new_virtqueue;
} }
if (!size || size > max_num)
size = max_num;
if (ops->get_vq_num_min) if (ops->get_vq_num_min)
min_num = ops->get_vq_num_min(vdpa); min_num = ops->get_vq_num_min(vdpa);
may_reduce_num = (max_num == min_num) ? false : true; may_reduce_num = (size == min_num) ? false : true;
/* Create the vring */ /* Create the vring */
align = ops->get_vq_align(vdpa); align = ops->get_vq_align(vdpa);
vq = vring_create_virtqueue(index, max_num, align, vdev, vq = vring_create_virtqueue(index, size, align, vdev,
true, may_reduce_num, ctx, true, may_reduce_num, ctx,
virtio_vdpa_notify, callback, name); virtio_vdpa_notify, callback, name);
if (!vq) { if (!vq) {
...@@ -183,6 +186,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, ...@@ -183,6 +186,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index,
goto error_new_virtqueue; goto error_new_virtqueue;
} }
vq->num_max = max_num;
/* Setup virtqueue callback */ /* Setup virtqueue callback */
cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL; cb.callback = callback ? virtio_vdpa_virtqueue_cb : NULL;
cb.private = info; cb.private = info;
...@@ -267,6 +272,7 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -267,6 +272,7 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
struct virtqueue *vqs[], struct virtqueue *vqs[],
vq_callback_t *callbacks[], vq_callback_t *callbacks[],
const char * const names[], const char * const names[],
u32 sizes[],
const bool *ctx, const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
...@@ -282,9 +288,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs, ...@@ -282,9 +288,9 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
continue; continue;
} }
vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, vqs[i] = virtio_vdpa_setup_vq(vdev, queue_idx++, callbacks[i],
callbacks[i], names[i], ctx ? names[i], sizes ? sizes[i] : 0,
ctx[i] : false); ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) { if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]); err = PTR_ERR(vqs[i]);
goto err_setup_vq; goto err_setup_vq;
......
...@@ -150,6 +150,14 @@ enum { ...@@ -150,6 +150,14 @@ enum {
MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3, MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3,
}; };
/* This indicates that the object was not created or has already
* been desroyed. It is very safe to assume that this object will never
* have so many states
*/
enum {
MLX5_VIRTIO_NET_Q_OBJECT_NONE = 0xffffffff
};
enum { enum {
MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0, MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0,
MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1,
......
...@@ -597,7 +597,7 @@ struct rproc_subdev { ...@@ -597,7 +597,7 @@ struct rproc_subdev {
/** /**
* struct rproc_vring - remoteproc vring state * struct rproc_vring - remoteproc vring state
* @va: virtual address * @va: virtual address
* @len: length, in bytes * @num: vring size
* @da: device address * @da: device address
* @align: vring alignment * @align: vring alignment
* @notifyid: rproc-specific unique vring index * @notifyid: rproc-specific unique vring index
...@@ -606,7 +606,7 @@ struct rproc_subdev { ...@@ -606,7 +606,7 @@ struct rproc_subdev {
*/ */
struct rproc_vring { struct rproc_vring {
void *va; void *va;
int len; int num;
u32 da; u32 da;
u32 align; u32 align;
int notifyid; int notifyid;
......
...@@ -218,6 +218,9 @@ struct vdpa_map_file { ...@@ -218,6 +218,9 @@ struct vdpa_map_file {
* @reset: Reset device * @reset: Reset device
* @vdev: vdpa device * @vdev: vdpa device
* Returns integer: success (0) or error (< 0) * Returns integer: success (0) or error (< 0)
* @suspend: Suspend or resume the device (optional)
* @vdev: vdpa device
* Returns integer: success (0) or error (< 0)
* @get_config_size: Get the size of the configuration space includes * @get_config_size: Get the size of the configuration space includes
* fields that are conditional on feature bits. * fields that are conditional on feature bits.
* @vdev: vdpa device * @vdev: vdpa device
...@@ -319,6 +322,7 @@ struct vdpa_config_ops { ...@@ -319,6 +322,7 @@ struct vdpa_config_ops {
u8 (*get_status)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status); void (*set_status)(struct vdpa_device *vdev, u8 status);
int (*reset)(struct vdpa_device *vdev); int (*reset)(struct vdpa_device *vdev);
int (*suspend)(struct vdpa_device *vdev);
size_t (*get_config_size)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len); void *buf, unsigned int len);
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
* @priv: a pointer for the virtqueue implementation to use. * @priv: a pointer for the virtqueue implementation to use.
* @index: the zero-based ordinal number for this queue. * @index: the zero-based ordinal number for this queue.
* @num_free: number of elements we expect to be able to fit. * @num_free: number of elements we expect to be able to fit.
* @num_max: the maximum number of elements supported by the device.
* @reset: vq is in reset state or not.
* *
* A note on @num_free: with indirect buffers, each buffer needs one * A note on @num_free: with indirect buffers, each buffer needs one
* element in the queue, otherwise a buffer will need one element per * element in the queue, otherwise a buffer will need one element per
...@@ -31,7 +33,9 @@ struct virtqueue { ...@@ -31,7 +33,9 @@ struct virtqueue {
struct virtio_device *vdev; struct virtio_device *vdev;
unsigned int index; unsigned int index;
unsigned int num_free; unsigned int num_free;
unsigned int num_max;
void *priv; void *priv;
bool reset;
}; };
int virtqueue_add_outbuf(struct virtqueue *vq, int virtqueue_add_outbuf(struct virtqueue *vq,
...@@ -89,6 +93,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq); ...@@ -89,6 +93,9 @@ dma_addr_t virtqueue_get_desc_addr(struct virtqueue *vq);
dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq); dma_addr_t virtqueue_get_avail_addr(struct virtqueue *vq);
dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq); dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
int virtqueue_resize(struct virtqueue *vq, u32 num,
void (*recycle)(struct virtqueue *vq, void *buf));
/** /**
* virtio_device - representation of a device using virtio * virtio_device - representation of a device using virtio
* @index: unique position on the virtio bus * @index: unique position on the virtio bus
...@@ -133,6 +140,9 @@ bool is_virtio_device(struct device *dev); ...@@ -133,6 +140,9 @@ bool is_virtio_device(struct device *dev);
void virtio_break_device(struct virtio_device *dev); void virtio_break_device(struct virtio_device *dev);
void __virtio_unbreak_device(struct virtio_device *dev); void __virtio_unbreak_device(struct virtio_device *dev);
void __virtqueue_break(struct virtqueue *_vq);
void __virtqueue_unbreak(struct virtqueue *_vq);
void virtio_config_changed(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev);
#ifdef CONFIG_PM_SLEEP #ifdef CONFIG_PM_SLEEP
int virtio_device_freeze(struct virtio_device *dev); int virtio_device_freeze(struct virtio_device *dev);
......
...@@ -55,6 +55,7 @@ struct virtio_shm_region { ...@@ -55,6 +55,7 @@ struct virtio_shm_region {
* include a NULL entry for vqs that do not need a callback * include a NULL entry for vqs that do not need a callback
* names: array of virtqueue names (mainly for debugging) * names: array of virtqueue names (mainly for debugging)
* include a NULL entry for vqs unused by driver * include a NULL entry for vqs unused by driver
* sizes: array of virtqueue sizes
* Returns 0 on success or error status * Returns 0 on success or error status
* @del_vqs: free virtqueues found by find_vqs(). * @del_vqs: free virtqueues found by find_vqs().
* @synchronize_cbs: synchronize with the virtqueue callbacks (optional) * @synchronize_cbs: synchronize with the virtqueue callbacks (optional)
...@@ -78,6 +79,18 @@ struct virtio_shm_region { ...@@ -78,6 +79,18 @@ struct virtio_shm_region {
* @set_vq_affinity: set the affinity for a virtqueue (optional). * @set_vq_affinity: set the affinity for a virtqueue (optional).
* @get_vq_affinity: get the affinity for a virtqueue (optional). * @get_vq_affinity: get the affinity for a virtqueue (optional).
* @get_shm_region: get a shared memory region based on the index. * @get_shm_region: get a shared memory region based on the index.
* @disable_vq_and_reset: reset a queue individually (optional).
* vq: the virtqueue
* Returns 0 on success or error status
* disable_vq_and_reset will guarantee that the callbacks are disabled and
* synchronized.
* Except for the callback, the caller should guarantee that the vring is
* not accessed by any functions of virtqueue.
* @enable_vq_after_reset: enable a reset queue
* vq: the virtqueue
* Returns 0 on success or error status
* If disable_vq_and_reset is set, then enable_vq_after_reset must also be
* set.
*/ */
typedef void vq_callback_t(struct virtqueue *); typedef void vq_callback_t(struct virtqueue *);
struct virtio_config_ops { struct virtio_config_ops {
...@@ -91,7 +104,9 @@ struct virtio_config_ops { ...@@ -91,7 +104,9 @@ struct virtio_config_ops {
void (*reset)(struct virtio_device *vdev); void (*reset)(struct virtio_device *vdev);
int (*find_vqs)(struct virtio_device *, unsigned nvqs, int (*find_vqs)(struct virtio_device *, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[], struct virtqueue *vqs[], vq_callback_t *callbacks[],
const char * const names[], const bool *ctx, const char * const names[],
u32 sizes[],
const bool *ctx,
struct irq_affinity *desc); struct irq_affinity *desc);
void (*del_vqs)(struct virtio_device *); void (*del_vqs)(struct virtio_device *);
void (*synchronize_cbs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *);
...@@ -104,6 +119,8 @@ struct virtio_config_ops { ...@@ -104,6 +119,8 @@ struct virtio_config_ops {
int index); int index);
bool (*get_shm_region)(struct virtio_device *vdev, bool (*get_shm_region)(struct virtio_device *vdev,
struct virtio_shm_region *region, u8 id); struct virtio_shm_region *region, u8 id);
int (*disable_vq_and_reset)(struct virtqueue *vq);
int (*enable_vq_after_reset)(struct virtqueue *vq);
}; };
/* If driver didn't advertise the feature, it will never appear. */ /* If driver didn't advertise the feature, it will never appear. */
...@@ -198,7 +215,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, ...@@ -198,7 +215,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
const char *names[] = { n }; const char *names[] = { n };
struct virtqueue *vq; struct virtqueue *vq;
int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL,
NULL); NULL, NULL);
if (err < 0) if (err < 0)
return ERR_PTR(err); return ERR_PTR(err);
return vq; return vq;
...@@ -210,7 +227,8 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, ...@@ -210,7 +227,8 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs,
const char * const names[], const char * const names[],
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
NULL, desc);
} }
static inline static inline
...@@ -219,8 +237,20 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, ...@@ -219,8 +237,20 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs,
const char * const names[], const bool *ctx, const char * const names[], const bool *ctx,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL,
desc); ctx, desc);
}
static inline
int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
u32 sizes[],
const bool *ctx, struct irq_affinity *desc)
{
return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes,
ctx, desc);
} }
/** /**
......
...@@ -5,6 +5,13 @@ ...@@ -5,6 +5,13 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/virtio_pci.h> #include <linux/virtio_pci.h>
struct virtio_pci_modern_common_cfg {
struct virtio_pci_common_cfg cfg;
__le16 queue_notify_data; /* read-write */
__le16 queue_reset; /* read-write */
};
struct virtio_pci_modern_device { struct virtio_pci_modern_device {
struct pci_dev *pci_dev; struct pci_dev *pci_dev;
...@@ -106,4 +113,6 @@ void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev, ...@@ -106,4 +113,6 @@ void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
u16 index, resource_size_t *pa); u16 index, resource_size_t *pa);
int vp_modern_probe(struct virtio_pci_modern_device *mdev); int vp_modern_probe(struct virtio_pci_modern_device *mdev);
void vp_modern_remove(struct virtio_pci_modern_device *mdev); void vp_modern_remove(struct virtio_pci_modern_device *mdev);
int vp_modern_get_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
void vp_modern_set_queue_reset(struct virtio_pci_modern_device *mdev, u16 index);
#endif #endif
...@@ -76,16 +76,6 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, ...@@ -76,16 +76,6 @@ struct virtqueue *vring_create_virtqueue(unsigned int index,
void (*callback)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq),
const char *name); const char *name);
/* Creates a virtqueue with a custom layout. */
struct virtqueue *__vring_new_virtqueue(unsigned int index,
struct vring vring,
struct virtio_device *vdev,
bool weak_barriers,
bool ctx,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name);
/* /*
* Creates a virtqueue with a standard layout but a caller-allocated * Creates a virtqueue with a standard layout but a caller-allocated
* ring. * ring.
......
...@@ -210,6 +210,53 @@ struct vduse_vq_eventfd { ...@@ -210,6 +210,53 @@ struct vduse_vq_eventfd {
*/ */
#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32) #define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
/**
* struct vduse_iova_umem - userspace memory configuration for one IOVA region
* @uaddr: start address of userspace memory, it must be aligned to page size
* @iova: start of the IOVA region
* @size: size of the IOVA region
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_REG_UMEM and VDUSE_IOTLB_DEREG_UMEM
* ioctls to register/de-register userspace memory for IOVA regions
*/
struct vduse_iova_umem {
__u64 uaddr;
__u64 iova;
__u64 size;
__u64 reserved[3];
};
/* Register userspace memory for IOVA regions */
#define VDUSE_IOTLB_REG_UMEM _IOW(VDUSE_BASE, 0x18, struct vduse_iova_umem)
/* De-register the userspace memory. Caller should set iova and size field. */
#define VDUSE_IOTLB_DEREG_UMEM _IOW(VDUSE_BASE, 0x19, struct vduse_iova_umem)
/**
* struct vduse_iova_info - information of one IOVA region
* @start: start of the IOVA region
* @last: last of the IOVA region
* @capability: capability of the IOVA regsion
* @reserved: for future use, needs to be initialized to zero
*
* Structure used by VDUSE_IOTLB_GET_INFO ioctl to get information of
* one IOVA region.
*/
struct vduse_iova_info {
__u64 start;
__u64 last;
#define VDUSE_IOVA_CAP_UMEM (1 << 0)
__u64 capability;
__u64 reserved[3];
};
/*
* Find the first IOVA region that overlaps with the range [start, last]
* and return some information on it. Caller should set start and last fields.
*/
#define VDUSE_IOTLB_GET_INFO _IOWR(VDUSE_BASE, 0x1a, struct vduse_iova_info)
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ /* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
/** /**
......
...@@ -171,4 +171,13 @@ ...@@ -171,4 +171,13 @@
#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ #define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \
struct vhost_vring_state) struct vhost_vring_state)
/* Suspend a device so it does not process virtqueue requests anymore
*
* After the return of ioctl the device must preserve all the necessary state
* (the virtqueue vring base plus the possible device specific states) that is
* required for restoring in the future. The device must not change its
* configuration after that point.
*/
#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D)
#endif #endif
...@@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range { ...@@ -161,5 +161,7 @@ struct vhost_vdpa_iova_range {
* message * message
*/ */
#define VHOST_BACKEND_F_IOTLB_ASID 0x3 #define VHOST_BACKEND_F_IOTLB_ASID 0x3
/* Device can be suspended */
#define VHOST_BACKEND_F_SUSPEND 0x4
#endif #endif
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
* rest are per-device feature bits. * rest are per-device feature bits.
*/ */
#define VIRTIO_TRANSPORT_F_START 28 #define VIRTIO_TRANSPORT_F_START 28
#define VIRTIO_TRANSPORT_F_END 38 #define VIRTIO_TRANSPORT_F_END 41
#ifndef VIRTIO_CONFIG_NO_LEGACY #ifndef VIRTIO_CONFIG_NO_LEGACY
/* Do we get callbacks when the ring is completely used, even if we've /* Do we get callbacks when the ring is completely used, even if we've
...@@ -98,4 +98,9 @@ ...@@ -98,4 +98,9 @@
* Does the device support Single Root I/O Virtualization? * Does the device support Single Root I/O Virtualization?
*/ */
#define VIRTIO_F_SR_IOV 37 #define VIRTIO_F_SR_IOV 37
/*
* This feature indicates that the driver can reset a queue individually.
*/
#define VIRTIO_F_RING_RESET 40
#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow
* Steering */ * Steering */
#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */
#define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */
#define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */
#define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */
...@@ -355,4 +355,36 @@ struct virtio_net_hash_config { ...@@ -355,4 +355,36 @@ struct virtio_net_hash_config {
#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5
#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0
/*
* Control notifications coalescing.
*
* Request the device to change the notifications coalescing parameters.
*
* Available with the VIRTIO_NET_F_NOTF_COAL feature bit.
*/
#define VIRTIO_NET_CTRL_NOTF_COAL 6
/*
* Set the tx-usecs/tx-max-packets patameters.
* tx-usecs - Maximum number of usecs to delay a TX notification.
* tx-max-packets - Maximum number of packets to send before a TX notification.
*/
struct virtio_net_ctrl_coal_tx {
__le32 tx_max_packets;
__le32 tx_usecs;
};
#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0
/*
* Set the rx-usecs/rx-max-packets patameters.
* rx-usecs - Maximum number of usecs to delay a RX notification.
* rx-max-frames - Maximum number of packets to receive before a RX notification.
*/
struct virtio_net_ctrl_coal_rx {
__le32 rx_max_packets;
__le32 rx_usecs;
};
#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1
#endif /* _UAPI_LINUX_VIRTIO_NET_H */ #endif /* _UAPI_LINUX_VIRTIO_NET_H */
...@@ -202,6 +202,8 @@ struct virtio_pci_cfg_cap { ...@@ -202,6 +202,8 @@ struct virtio_pci_cfg_cap {
#define VIRTIO_PCI_COMMON_Q_AVAILHI 44 #define VIRTIO_PCI_COMMON_Q_AVAILHI 44
#define VIRTIO_PCI_COMMON_Q_USEDLO 48 #define VIRTIO_PCI_COMMON_Q_USEDLO 48
#define VIRTIO_PCI_COMMON_Q_USEDHI 52 #define VIRTIO_PCI_COMMON_Q_USEDHI 52
#define VIRTIO_PCI_COMMON_Q_NDATA 56
#define VIRTIO_PCI_COMMON_Q_RESET 58
#endif /* VIRTIO_PCI_NO_MODERN */ #endif /* VIRTIO_PCI_NO_MODERN */
......
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
#define READ 0 #define READ 0
#define WRITE 1 #define WRITE 1
typedef unsigned long long phys_addr_t;
typedef unsigned long long dma_addr_t; typedef unsigned long long dma_addr_t;
typedef size_t __kernel_size_t; typedef size_t __kernel_size_t;
typedef unsigned int __wsum; typedef unsigned int __wsum;
...@@ -136,6 +135,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t ...@@ -136,6 +135,7 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t
#endif #endif
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define dev_warn_once(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
#define min(x, y) ({ \ #define min(x, y) ({ \
typeof(x) _min1 = (x); \ typeof(x) _min1 = (x); \
......
#include <limits.h>
#include "../../../include/linux/vringh.h" #include "../../../include/linux/vringh.h"
...@@ -102,8 +102,8 @@ static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev) ...@@ -102,8 +102,8 @@ static void vq_reset(struct vq_info *info, int num, struct virtio_device *vdev)
memset(info->ring, 0, vring_size(num, 4096)); memset(info->ring, 0, vring_size(num, 4096));
vring_init(&info->vring, num, info->ring, 4096); vring_init(&info->vring, num, info->ring, 4096);
info->vq = __vring_new_virtqueue(info->idx, info->vring, vdev, true, info->vq = vring_new_virtqueue(info->idx, num, 4096, vdev, true, false,
false, vq_notify, vq_callback, "test"); info->ring, vq_notify, vq_callback, "test");
assert(info->vq); assert(info->vq);
info->vq->priv = info; info->vq->priv = info;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment