Commit 3fdd47c3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull vhost fixes from Michael Tsirkin:
 "Some last minute vhost,vdpa fixes.

  The last two of them haven't been in next but they do seem kind of
  obvious, very small and safe, fix bugs reported in the field, and they
  are both in a new mlx5 vdpa driver, so it's not like we can introduce
  regressions"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  vdpa/mlx5: Fix dependency on MLX5_CORE
  vdpa/mlx5: should keep avail_index despite device status
  vhost-vdpa: fix page pinning leakage in error path
  vhost-vdpa: fix vhost_vdpa_map() on error condition
  vhost: Don't call log_access_ok() when using IOTLB
  vhost: Use vhost_get_used_size() in vhost_vring_set_addr()
  vhost: Don't call access_ok() when using IOTLB
  vhost vdpa: fix vhost_vdpa_open error handling
parents 6288c1d8 aff90770
...@@ -30,9 +30,7 @@ config IFCVF ...@@ -30,9 +30,7 @@ config IFCVF
be called ifcvf. be called ifcvf.
config MLX5_VDPA config MLX5_VDPA
bool "MLX5 VDPA support library for ConnectX devices" bool
depends on MLX5_CORE
default n
help help
Support library for Mellanox VDPA drivers. Provides code that is Support library for Mellanox VDPA drivers. Provides code that is
common for all types of VDPA drivers. The following drivers are planned: common for all types of VDPA drivers. The following drivers are planned:
...@@ -40,7 +38,8 @@ config MLX5_VDPA ...@@ -40,7 +38,8 @@ config MLX5_VDPA
config MLX5_VDPA_NET config MLX5_VDPA_NET
tristate "vDPA driver for ConnectX devices" tristate "vDPA driver for ConnectX devices"
depends on MLX5_VDPA select MLX5_VDPA
depends on MLX5_CORE
default n default n
help help
VDPA network driver for ConnectX6 and newer. Provides offloading VDPA network driver for ConnectX6 and newer. Provides offloading
......
...@@ -1133,15 +1133,17 @@ static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m ...@@ -1133,15 +1133,17 @@ static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m
if (!mvq->initialized) if (!mvq->initialized)
return; return;
if (query_virtqueue(ndev, mvq, &attr)) {
mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
return;
}
if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
return; return;
if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
if (query_virtqueue(ndev, mvq, &attr)) {
mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
return;
}
mvq->avail_idx = attr.available_index;
} }
static void suspend_vqs(struct mlx5_vdpa_net *ndev) static void suspend_vqs(struct mlx5_vdpa_net *ndev)
...@@ -1411,8 +1413,14 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa ...@@ -1411,8 +1413,14 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
struct mlx5_virtq_attr attr; struct mlx5_virtq_attr attr;
int err; int err;
if (!mvq->initialized) /* If the virtq object was destroyed, use the value saved at
return -EAGAIN; * the last minute of suspend_vq. This caters for userspace
* that cares about emulating the index after vq is stopped.
*/
if (!mvq->initialized) {
state->avail_index = mvq->avail_idx;
return 0;
}
err = query_virtqueue(ndev, mvq, &attr); err = query_virtqueue(ndev, mvq, &attr);
if (err) { if (err) {
......
...@@ -565,6 +565,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, ...@@ -565,6 +565,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
perm_to_iommu_flags(perm)); perm_to_iommu_flags(perm));
} }
if (r)
vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
return r; return r;
} }
...@@ -592,21 +595,19 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, ...@@ -592,21 +595,19 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
struct vhost_dev *dev = &v->vdev; struct vhost_dev *dev = &v->vdev;
struct vhost_iotlb *iotlb = dev->iotlb; struct vhost_iotlb *iotlb = dev->iotlb;
struct page **page_list; struct page **page_list;
unsigned long list_size = PAGE_SIZE / sizeof(struct page *); struct vm_area_struct **vmas;
unsigned int gup_flags = FOLL_LONGTERM; unsigned int gup_flags = FOLL_LONGTERM;
unsigned long npages, cur_base, map_pfn, last_pfn = 0; unsigned long map_pfn, last_pfn = 0;
unsigned long locked, lock_limit, pinned, i; unsigned long npages, lock_limit;
unsigned long i, nmap = 0;
u64 iova = msg->iova; u64 iova = msg->iova;
long pinned;
int ret = 0; int ret = 0;
if (vhost_iotlb_itree_first(iotlb, msg->iova, if (vhost_iotlb_itree_first(iotlb, msg->iova,
msg->iova + msg->size - 1)) msg->iova + msg->size - 1))
return -EEXIST; return -EEXIST;
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
if (msg->perm & VHOST_ACCESS_WO) if (msg->perm & VHOST_ACCESS_WO)
gup_flags |= FOLL_WRITE; gup_flags |= FOLL_WRITE;
...@@ -614,61 +615,86 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, ...@@ -614,61 +615,86 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
if (!npages) if (!npages)
return -EINVAL; return -EINVAL;
page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
GFP_KERNEL);
if (!page_list || !vmas) {
ret = -ENOMEM;
goto free;
}
mmap_read_lock(dev->mm); mmap_read_lock(dev->mm);
locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
if (locked > lock_limit) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto unlock;
} }
cur_base = msg->uaddr & PAGE_MASK; pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
iova &= PAGE_MASK; page_list, vmas);
if (npages != pinned) {
if (pinned < 0) {
ret = pinned;
} else {
unpin_user_pages(page_list, pinned);
ret = -ENOMEM;
}
goto unlock;
}
while (npages) { iova &= PAGE_MASK;
pinned = min_t(unsigned long, npages, list_size); map_pfn = page_to_pfn(page_list[0]);
ret = pin_user_pages(cur_base, pinned,
gup_flags, page_list, NULL); /* One more iteration to avoid extra vdpa_map() call out of loop. */
if (ret != pinned) for (i = 0; i <= npages; i++) {
goto out; unsigned long this_pfn;
u64 csize;
if (!last_pfn)
map_pfn = page_to_pfn(page_list[0]); /* The last chunk may have no valid PFN next to it */
this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
for (i = 0; i < ret; i++) {
unsigned long this_pfn = page_to_pfn(page_list[i]); if (last_pfn && (this_pfn == -1UL ||
u64 csize; this_pfn != last_pfn + 1)) {
/* Pin a contiguous chunk of memory */
if (last_pfn && (this_pfn != last_pfn + 1)) { csize = last_pfn - map_pfn + 1;
/* Pin a contiguous chunk of memory */ ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; map_pfn << PAGE_SHIFT,
if (vhost_vdpa_map(v, iova, csize, msg->perm);
map_pfn << PAGE_SHIFT, if (ret) {
msg->perm)) /*
goto out; * Unpin the rest chunks of memory on the
map_pfn = this_pfn; * flight with no corresponding vdpa_map()
iova += csize; * calls having been made yet. On the other
* hand, vdpa_unmap() in the failure path
* is in charge of accounting the number of
* pinned pages for its own.
* This asymmetrical pattern of accounting
* is for efficiency to pin all pages at
* once, while there is no other callsite
* of vdpa_map() than here above.
*/
unpin_user_pages(&page_list[nmap],
npages - nmap);
goto out;
} }
atomic64_add(csize, &dev->mm->pinned_vm);
last_pfn = this_pfn; nmap += csize;
iova += csize << PAGE_SHIFT;
map_pfn = this_pfn;
} }
last_pfn = this_pfn;
cur_base += ret << PAGE_SHIFT;
npages -= ret;
} }
/* Pin the rest chunk */ WARN_ON(nmap != npages);
ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
map_pfn << PAGE_SHIFT, msg->perm);
out: out:
if (ret) { if (ret)
vhost_vdpa_unmap(v, msg->iova, msg->size); vhost_vdpa_unmap(v, msg->iova, msg->size);
atomic64_sub(npages, &dev->mm->pinned_vm); unlock:
}
mmap_read_unlock(dev->mm); mmap_read_unlock(dev->mm);
free_page((unsigned long)page_list); free:
kvfree(vmas);
kvfree(page_list);
return ret; return ret;
} }
...@@ -810,6 +836,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) ...@@ -810,6 +836,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
err_init_iotlb: err_init_iotlb:
vhost_dev_cleanup(&v->vdev); vhost_dev_cleanup(&v->vdev);
kfree(vqs);
err: err:
atomic_dec(&v->opened); atomic_dec(&v->opened);
return r; return r;
......
...@@ -1290,6 +1290,11 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, ...@@ -1290,6 +1290,11 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
vring_used_t __user *used) vring_used_t __user *used)
{ {
/* If an IOTLB device is present, the vring addresses are
* GIOVAs. Access validation occurs at prefetch time. */
if (vq->iotlb)
return true;
return access_ok(desc, vhost_get_desc_size(vq, num)) && return access_ok(desc, vhost_get_desc_size(vq, num)) &&
access_ok(avail, vhost_get_avail_size(vq, num)) && access_ok(avail, vhost_get_avail_size(vq, num)) &&
access_ok(used, vhost_get_used_size(vq, num)); access_ok(used, vhost_get_used_size(vq, num));
...@@ -1365,6 +1370,20 @@ bool vhost_log_access_ok(struct vhost_dev *dev) ...@@ -1365,6 +1370,20 @@ bool vhost_log_access_ok(struct vhost_dev *dev)
} }
EXPORT_SYMBOL_GPL(vhost_log_access_ok); EXPORT_SYMBOL_GPL(vhost_log_access_ok);
static bool vq_log_used_access_ok(struct vhost_virtqueue *vq,
void __user *log_base,
bool log_used,
u64 log_addr)
{
/* If an IOTLB device is present, log_addr is a GIOVA that
* will never be logged by log_used(). */
if (vq->iotlb)
return true;
return !log_used || log_access_ok(log_base, log_addr,
vhost_get_used_size(vq, vq->num));
}
/* Verify access for write logging. */ /* Verify access for write logging. */
/* Caller should have vq mutex and device mutex */ /* Caller should have vq mutex and device mutex */
static bool vq_log_access_ok(struct vhost_virtqueue *vq, static bool vq_log_access_ok(struct vhost_virtqueue *vq,
...@@ -1372,8 +1391,7 @@ static bool vq_log_access_ok(struct vhost_virtqueue *vq, ...@@ -1372,8 +1391,7 @@ static bool vq_log_access_ok(struct vhost_virtqueue *vq,
{ {
return vq_memory_access_ok(log_base, vq->umem, return vq_memory_access_ok(log_base, vq->umem,
vhost_has_feature(vq, VHOST_F_LOG_ALL)) && vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
(!vq->log_used || log_access_ok(log_base, vq->log_addr, vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr);
vhost_get_used_size(vq, vq->num)));
} }
/* Can we start vq? */ /* Can we start vq? */
...@@ -1383,10 +1401,6 @@ bool vhost_vq_access_ok(struct vhost_virtqueue *vq) ...@@ -1383,10 +1401,6 @@ bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
if (!vq_log_access_ok(vq, vq->log_base)) if (!vq_log_access_ok(vq, vq->log_base))
return false; return false;
/* Access validation occurs at prefetch time with IOTLB */
if (vq->iotlb)
return true;
return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
} }
EXPORT_SYMBOL_GPL(vhost_vq_access_ok); EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
...@@ -1516,10 +1530,9 @@ static long vhost_vring_set_addr(struct vhost_dev *d, ...@@ -1516,10 +1530,9 @@ static long vhost_vring_set_addr(struct vhost_dev *d,
return -EINVAL; return -EINVAL;
/* Also validate log access for used ring if enabled. */ /* Also validate log access for used ring if enabled. */
if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && if (!vq_log_used_access_ok(vq, vq->log_base,
!log_access_ok(vq->log_base, a.log_guest_addr, a.flags & (0x1 << VHOST_VRING_F_LOG),
sizeof *vq->used + a.log_guest_addr))
vq->num * sizeof *vq->used->ring))
return -EINVAL; return -EINVAL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment