Commit 4e8c0c8c authored by David Vrabel's avatar David Vrabel

xen/privcmd: improve performance of MMAPBATCH_V2

Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map
multiple frames at a time rather than one at a time, despite the pages
being non-consecutive GFNs.

xen_remap_foreign_mfn_array() is added which maps an array of GFNs
(instead of a consecutive range of GFNs).

Since per-frame errors are returned in an array, privcmd must set the
MMAPBATCH_V1 error bits as part of the "report errors" phase, after
all the frames are mapped.

Migrate times are significantly improved (when using a PV toolstack
domain).  For example, for an idle 12 GiB PV guest:

        Before     After
  real  0m38.179s  0m26.868s
  user  0m15.096s  0m13.652s
  sys   0m28.988s  0m18.732s
Signed-off-by: default avatarDavid Vrabel <david.vrabel@citrix.com>
Reviewed-by: default avatarStefano Stabellini <stefano.stabellini@eu.citrix.com>
parent 628c28ee
......@@ -53,15 +53,27 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
static __read_mostly int xen_events_irq = -1;
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
xen_pfn_t *mfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages)
{
return xen_xlate_remap_gfn_range(vma, addr, mfn, nr,
return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
/* Not used by XENFEAT_auto_translated guests. */
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)
{
return -ENOSYS;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
......
......@@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void)
#define REMAP_BATCH_SIZE 16
struct remap_data {
unsigned long mfn;
xen_pfn_t *mfn;
bool contiguous;
pgprot_t prot;
struct mmu_update *mmu_update;
};
......@@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
{
struct remap_data *rmd = data;
pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
/* If we have a contigious range, just update the mfn itself,
else update pointer to be "next mfn". */
if (rmd->contiguous)
(*rmd->mfn)++;
else
rmd->mfn++;
rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
rmd->mmu_update->val = pte_val_ma(pte);
......@@ -2457,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
return 0;
}
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)
static int do_remap_mfn(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *mfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages)
{
int err = 0;
struct remap_data rmd;
struct mmu_update mmu_update[REMAP_BATCH_SIZE];
int batch;
unsigned long range;
int err = 0;
int mapped = 0;
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
if (xen_feature(XENFEAT_auto_translated_physmap)) {
#ifdef CONFIG_XEN_PVH
/* We need to update the local page tables and the xen HAP */
return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
domid, pages);
return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
prot, domid, pages);
#else
return -EINVAL;
#endif
......@@ -2484,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
rmd.mfn = mfn;
rmd.prot = prot;
/* We use the err_ptr to indicate if there we are doing a contigious
* mapping or a discontigious mapping. */
rmd.contiguous = !err_ptr;
while (nr) {
batch = min(REMAP_BATCH_SIZE, nr);
int index = 0;
int done = 0;
int batch = min(REMAP_BATCH_SIZE, nr);
int batch_left = batch;
range = (unsigned long)batch << PAGE_SHIFT;
rmd.mmu_update = mmu_update;
......@@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
if (err)
goto out;
err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
if (err < 0)
goto out;
/* We record the error for each page that gives an error, but
* continue mapping until the whole set is done */
do {
int i;
err = HYPERVISOR_mmu_update(&mmu_update[index],
batch_left, &done, domid);
/*
* @err_ptr may be the same buffer as @mfn, so
* only clear it after each chunk of @mfn is
* used.
*/
if (err_ptr) {
for (i = index; i < index + done; i++)
err_ptr[i] = 0;
}
if (err < 0) {
if (!err_ptr)
goto out;
err_ptr[i] = err;
done++; /* Skip failed frame. */
} else
mapped += done;
batch_left -= done;
index += done;
} while (batch_left);
nr -= batch;
addr += range;
if (err_ptr)
err_ptr += batch;
}
err = 0;
out:
xen_flush_tlb_all();
return err;
return err < 0 ? err : mapped;
}
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages)
{
return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *mfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid, struct page **pages)
{
/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
* and the consequences later is quite hard to detect what the actual
* cause of "wrong memory was mapped in".
*/
BUG_ON(err_ptr == NULL);
return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
/* Returns: 0 success */
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
int numpgs, struct page **pages)
......
......@@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size,
return ret;
}
/*
* Similar to traverse_pages, but use each page as a "block" of
* data to be processed as one unit.
*/
static int traverse_pages_block(unsigned nelem, size_t size,
struct list_head *pos,
int (*fn)(void *data, int nr, void *state),
void *state)
{
void *pagedata;
unsigned pageidx;
int ret = 0;
BUG_ON(size > PAGE_SIZE);
pageidx = PAGE_SIZE;
while (nelem) {
int nr = (PAGE_SIZE/size);
struct page *page;
if (nr > nelem)
nr = nelem;
pos = pos->next;
page = list_entry(pos, struct page, lru);
pagedata = page_address(page);
ret = (*fn)(pagedata, nr, state);
if (ret)
break;
nelem -= nr;
}
return ret;
}
struct mmap_mfn_state {
unsigned long va;
struct vm_area_struct *vma;
......@@ -274,39 +308,25 @@ struct mmap_batch_state {
/* auto translated dom0 note: if domU being created is PV, then mfn is
* mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
*/
static int mmap_batch_fn(void *data, void *state)
static int mmap_batch_fn(void *data, int nr, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
struct vm_area_struct *vma = st->vma;
struct page **pages = vma->vm_private_data;
struct page *cur_page = NULL;
struct page **cur_pages = NULL;
int ret;
if (xen_feature(XENFEAT_auto_translated_physmap))
cur_page = pages[st->index++];
cur_pages = &pages[st->index];
ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
st->vma->vm_page_prot, st->domain,
&cur_page);
BUG_ON(nr < 0);
ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
(int *)mfnp, st->vma->vm_page_prot,
st->domain, cur_pages);
/* Store error code for second pass. */
if (st->version == 1) {
if (ret < 0) {
/*
* V1 encodes the error codes in the 32bit top nibble of the
* mfn (with its known limitations vis-a-vis 64 bit callers).
*/
*mfnp |= (ret == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR;
}
} else { /* st->version == 2 */
*((int *) mfnp) = ret;
}
/* And see if it affects the global_error. */
if (ret < 0) {
/* Adjust the global_error? */
if (ret != nr) {
if (ret == -ENOENT)
st->global_error = -ENOENT;
else {
......@@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state)
st->global_error = 1;
}
}
st->va += PAGE_SIZE;
st->va += PAGE_SIZE * nr;
st->index += nr;
return 0;
}
static int mmap_return_errors(void *data, void *state)
static int mmap_return_error(int err, struct mmap_batch_state *st)
{
struct mmap_batch_state *st = state;
int ret;
if (st->version == 1) {
xen_pfn_t mfnp = *((xen_pfn_t *) data);
if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
return __put_user(mfnp, st->user_mfn++);
else
if (err) {
xen_pfn_t mfn;
ret = get_user(mfn, st->user_mfn);
if (ret < 0)
return ret;
/*
* V1 encodes the error codes in the 32bit top
* nibble of the mfn (with its known
* limitations vis-a-vis 64 bit callers).
*/
mfn |= (err == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR;
return __put_user(mfn, st->user_mfn++);
} else
st->user_mfn++;
} else { /* st->version == 2 */
int err = *((int *) data);
if (err)
return __put_user(err, st->user_err++);
else
......@@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state)
return 0;
}
static int mmap_return_errors(void *data, int nr, void *state)
{
struct mmap_batch_state *st = state;
int *errs = data;
int i;
int ret;
for (i = 0; i < nr; i++) {
ret = mmap_return_error(errs[i], st);
if (ret < 0)
return ret;
}
return 0;
}
/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
* the vma with the page info to use later.
* Returns: 0 if success, otherwise -errno
......@@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
state.version = version;
/* mmap_batch_fn guarantees ret == 0 */
BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_batch_fn, &state));
BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_batch_fn, &state));
up_write(&mm->mmap_sem);
......@@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
/* Write back errors in second pass. */
state.user_mfn = (xen_pfn_t *)m.arr;
state.user_err = m.err;
ret = traverse_pages(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors, &state);
ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors, &state);
} else
ret = 0;
......
......@@ -62,13 +62,15 @@ static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
}
struct remap_data {
xen_pfn_t fgmfn; /* foreign domain's gmfn */
xen_pfn_t *fgmfn; /* foreign domain's gmfn */
pgprot_t prot;
domid_t domid;
struct vm_area_struct *vma;
int index;
struct page **pages;
struct xen_remap_mfn_info *info;
int *err_ptr;
int mapped;
};
static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
......@@ -80,38 +82,46 @@ static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
int rc;
rc = map_foreign_page(pfn, info->fgmfn, info->domid);
if (rc < 0)
return rc;
set_pte_at(info->vma->vm_mm, addr, ptep, pte);
rc = map_foreign_page(pfn, *info->fgmfn, info->domid);
*info->err_ptr++ = rc;
if (!rc) {
set_pte_at(info->vma->vm_mm, addr, ptep, pte);
info->mapped++;
}
info->fgmfn++;
return 0;
}
int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t gfn, int nr,
pgprot_t prot, unsigned domid,
xen_pfn_t *mfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages)
{
int err;
struct remap_data data;
unsigned long range = nr << PAGE_SHIFT;
/* TBD: Batching, current sole caller only does page at a time */
if (nr > 1)
return -EINVAL;
/* Kept here for the purpose of making sure code doesn't break
x86 PVOPS */
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
data.fgmfn = gfn;
data.prot = prot;
data.fgmfn = mfn;
data.prot = prot;
data.domid = domid;
data.vma = vma;
data.index = 0;
data.vma = vma;
data.pages = pages;
err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
data.index = 0;
data.err_ptr = err_ptr;
data.mapped = 0;
err = apply_to_page_range(vma->vm_mm, addr, range,
remap_pte_fn, &data);
return err;
return err < 0 ? err : data.mapped;
}
EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_range);
EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array);
int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
int nr, struct page **pages)
......
......@@ -27,17 +27,54 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
struct vm_area_struct;
/*
* xen_remap_domain_mfn_array() - map an array of foreign frames
* @vma: VMA to map the pages into
* @addr: Address at which to map the pages
* @gfn: Array of GFNs to map
* @nr: Number entries in the GFN array
* @err_ptr: Returns per-GFN error status.
* @prot: page protection mask
* @domid: Domain owning the pages
* @pages: Array of pages if this domain has an auto-translated physmap
*
* @gfn and @err_ptr may point to the same buffer, the GFNs will be
* overwritten by the error codes after they are mapped.
*
* Returns the number of successfully mapped frames, or a -ve error
* code.
*/
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *gfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages);
/* xen_remap_domain_mfn_range() - map a range of foreign frames
* @vma: VMA to map the pages into
* @addr: Address at which to map the pages
* @gfn: First GFN to map.
* @nr: Number frames to map
* @prot: page protection mask
* @domid: Domain owning the pages
* @pages: Array of pages if this domain has an auto-translated physmap
*
* Returns the number of successfully mapped frames, or a -ve error
* code.
*/
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t mfn, int nr,
xen_pfn_t gfn, int nr,
pgprot_t prot, unsigned domid,
struct page **pages);
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
int numpgs, struct page **pages);
int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t gfn, int nr,
pgprot_t prot,
xen_pfn_t *gfn, int nr,
int *err_ptr, pgprot_t prot,
unsigned domid,
struct page **pages);
int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment