Commit 0bb599fd authored by David Vrabel's avatar David Vrabel

xen: remove scratch frames for ballooned pages and m2p override

The scratch frame mappings for ballooned pages and the m2p override
are broken.  Remove them in preparation for replacing them with
simpler mechanisms that works.

The scratch pages did not ensure that the page was not in use.  In
particular, the foreign page could still be in use by hardware.  If
the guest reused the frame the hardware could read or write that
frame.

The m2p override did not handle the same frame being granted by two
different grant references.  Trying an M2P override lookup in this
case is impossible.

With the m2p override removed, the grant map/unmap for the kernel
mappings (for x86 PV) can be easily batched in
set_foreign_p2m_mapping() and clear_foreign_p2m_mapping().
Signed-off-by: default avatarDavid Vrabel <david.vrabel@citrix.com>
Reviewed-by: default avatarStefano Stabellini <stefano.stabellini@eu.citrix.com>
parent 853d0289
......@@ -57,7 +57,6 @@ extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_unmap_grant_ref *kunmap_ops,
struct page **pages, unsigned int count);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
/*
* Helper functions to write or read unsigned long values to/from
......@@ -154,21 +153,12 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
return mfn;
pfn = mfn_to_pfn_no_overrides(mfn);
if (__pfn_to_mfn(pfn) != mfn) {
/*
* If this appears to be a foreign mfn (because the pfn
* doesn't map back to the mfn), then check the local override
* table to see if there's a better pfn to use.
*
* m2p_find_override_pfn returns ~0 if it doesn't find anything.
*/
pfn = m2p_find_override_pfn(mfn, ~0);
}
if (__pfn_to_mfn(pfn) != mfn)
pfn = ~0;
/*
* pfn is ~0 if there are no entries in the m2p for mfn or if the
* entry doesn't map back to the mfn and m2p_override doesn't have a
* valid entry for it.
* pfn is ~0 if there are no entries in the m2p for mfn or the
* entry doesn't map back to the mfn.
*/
if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
pfn = mfn;
......
......@@ -84,8 +84,6 @@
#define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE)
static void __init m2p_override_init(void);
unsigned long *xen_p2m_addr __read_mostly;
EXPORT_SYMBOL_GPL(xen_p2m_addr);
unsigned long xen_p2m_size __read_mostly;
......@@ -402,8 +400,6 @@ void __init xen_vmalloc_p2m_tree(void)
xen_p2m_size = xen_max_p2m_pfn;
xen_inv_extra_mem();
m2p_override_init();
}
unsigned long get_phys_to_machine(unsigned long pfn)
......@@ -652,100 +648,21 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
return true;
}
#define M2P_OVERRIDE_HASH_SHIFT 10
#define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT)
static struct list_head *m2p_overrides;
static DEFINE_SPINLOCK(m2p_override_lock);
static void __init m2p_override_init(void)
{
unsigned i;
m2p_overrides = alloc_bootmem_align(
sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
sizeof(unsigned long));
for (i = 0; i < M2P_OVERRIDE_HASH; i++)
INIT_LIST_HEAD(&m2p_overrides[i]);
}
static unsigned long mfn_hash(unsigned long mfn)
{
return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
}
/* Add an MFN override for a particular page */
static int m2p_add_override(unsigned long mfn, struct page *page,
struct gnttab_map_grant_ref *kmap_op)
{
unsigned long flags;
unsigned long pfn;
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
pfn = page_to_pfn(page);
if (!PageHighMem(page)) {
address = (unsigned long)__va(pfn << PAGE_SHIFT);
ptep = lookup_address(address, &level);
if (WARN(ptep == NULL || level != PG_LEVEL_4K,
"m2p_add_override: pfn %lx not mapped", pfn))
return -EINVAL;
}
if (kmap_op != NULL) {
if (!PageHighMem(page)) {
struct multicall_space mcs =
xen_mc_entry(sizeof(*kmap_op));
MULTI_grant_table_op(mcs.mc,
GNTTABOP_map_grant_ref, kmap_op, 1);
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
}
spin_lock_irqsave(&m2p_override_lock, flags);
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
spin_unlock_irqrestore(&m2p_override_lock, flags);
/* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
* this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
* pfn so that the following mfn_to_pfn(mfn) calls will return the
* pfn from the m2p_override (the backend pfn) instead.
* We need to do this because the pages shared by the frontend
* (xen-blkfront) can be already locked (lock_page, called by
* do_read_cache_page); when the userspace backend tries to use them
* with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
* do_blockdev_direct_IO is going to try to lock the same pages
* again resulting in a deadlock.
* As a side effect get_user_pages_fast might not be safe on the
* frontend pages while they are being shared with the backend,
* because mfn_to_pfn (that ends up being called by GUPF) will
* return the backend pfn rather than the frontend pfn. */
pfn = mfn_to_pfn_no_overrides(mfn);
if (__pfn_to_mfn(pfn) == mfn)
set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
return 0;
}
int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
struct gnttab_map_grant_ref *kmap_ops,
struct page **pages, unsigned int count)
{
int i, ret = 0;
bool lazy = false;
pte_t *pte;
if (xen_feature(XENFEAT_auto_translated_physmap))
return 0;
if (kmap_ops &&
!in_interrupt() &&
paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
arch_enter_lazy_mmu_mode();
lazy = true;
if (kmap_ops) {
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
kmap_ops, count);
if (ret)
goto out;
}
for (i = 0; i < count; i++) {
......@@ -773,160 +690,22 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
ret = -ENOMEM;
goto out;
}
if (kmap_ops) {
ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
if (ret)
goto out;
}
}
out:
if (lazy)
arch_leave_lazy_mmu_mode();
return ret;
}
EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
static struct page *m2p_find_override(unsigned long mfn)
{
unsigned long flags;
struct list_head *bucket;
struct page *p, *ret;
if (unlikely(!m2p_overrides))
return NULL;
ret = NULL;
bucket = &m2p_overrides[mfn_hash(mfn)];
spin_lock_irqsave(&m2p_override_lock, flags);
list_for_each_entry(p, bucket, lru) {
if (page_private(p) == mfn) {
ret = p;
break;
}
}
spin_unlock_irqrestore(&m2p_override_lock, flags);
return ret;
}
static int m2p_remove_override(struct page *page,
struct gnttab_unmap_grant_ref *kunmap_op,
unsigned long mfn)
{
unsigned long flags;
unsigned long pfn;
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
pfn = page_to_pfn(page);
if (!PageHighMem(page)) {
address = (unsigned long)__va(pfn << PAGE_SHIFT);
ptep = lookup_address(address, &level);
if (WARN(ptep == NULL || level != PG_LEVEL_4K,
"m2p_remove_override: pfn %lx not mapped", pfn))
return -EINVAL;
}
spin_lock_irqsave(&m2p_override_lock, flags);
list_del(&page->lru);
spin_unlock_irqrestore(&m2p_override_lock, flags);
if (kunmap_op != NULL) {
if (!PageHighMem(page)) {
struct multicall_space mcs;
struct gnttab_unmap_and_replace *unmap_op;
struct page *scratch_page = get_balloon_scratch_page();
unsigned long scratch_page_address = (unsigned long)
__va(page_to_pfn(scratch_page) << PAGE_SHIFT);
/*
* It might be that we queued all the m2p grant table
* hypercalls in a multicall, then m2p_remove_override
* get called before the multicall has actually been
* issued. In this case handle is going to -1 because
* it hasn't been modified yet.
*/
if (kunmap_op->handle == -1)
xen_mc_flush();
/*
* Now if kmap_op->handle is negative it means that the
* hypercall actually returned an error.
*/
if (kunmap_op->handle == GNTST_general_error) {
pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings",
pfn, mfn);
put_balloon_scratch_page();
return -1;
}
xen_mc_batch();
mcs = __xen_mc_entry(
sizeof(struct gnttab_unmap_and_replace));
unmap_op = mcs.args;
unmap_op->host_addr = kunmap_op->host_addr;
unmap_op->new_addr = scratch_page_address;
unmap_op->handle = kunmap_op->handle;
MULTI_grant_table_op(mcs.mc,
GNTTABOP_unmap_and_replace, unmap_op, 1);
mcs = __xen_mc_entry(0);
MULTI_update_va_mapping(mcs.mc, scratch_page_address,
pfn_pte(page_to_pfn(scratch_page),
PAGE_KERNEL_RO), 0);
xen_mc_issue(PARAVIRT_LAZY_MMU);
put_balloon_scratch_page();
}
}
/* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
* somewhere in this domain, even before being added to the
* m2p_override (see comment above in m2p_add_override).
* If there are no other entries in the m2p_override corresponding
* to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
* the original pfn (the one shared by the frontend): the backend
* cannot do any IO on this page anymore because it has been
* unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
* the original pfn causes mfn_to_pfn(mfn) to return the frontend
* pfn again. */
mfn &= ~FOREIGN_FRAME_BIT;
pfn = mfn_to_pfn_no_overrides(mfn);
if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) &&
m2p_find_override(mfn) == NULL)
set_phys_to_machine(pfn, mfn);
return 0;
}
int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
struct gnttab_unmap_grant_ref *kunmap_ops,
struct page **pages, unsigned int count)
{
int i, ret = 0;
bool lazy = false;
if (xen_feature(XENFEAT_auto_translated_physmap))
return 0;
if (kunmap_ops &&
!in_interrupt() &&
paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
arch_enter_lazy_mmu_mode();
lazy = true;
}
for (i = 0; i < count; i++) {
unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
unsigned long pfn = page_to_pfn(pages[i]);
......@@ -940,32 +719,15 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
WARN_ON(!PagePrivate(pages[i]));
ClearPagePrivate(pages[i]);
set_phys_to_machine(pfn, pages[i]->index);
if (kunmap_ops)
ret = m2p_remove_override(pages[i], &kunmap_ops[i], mfn);
if (ret)
goto out;
}
if (kunmap_ops)
ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
kunmap_ops, count);
out:
if (lazy)
arch_leave_lazy_mmu_mode();
return ret;
}
EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
{
struct page *p = m2p_find_override(mfn);
unsigned long ret = pfn;
if (p)
ret = page_to_pfn(p);
return ret;
}
EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
#ifdef CONFIG_XEN_DEBUG_FS
#include <linux/debugfs.h>
#include "debugfs.h"
......
......@@ -92,7 +92,6 @@ EXPORT_SYMBOL_GPL(balloon_stats);
/* We increase/decrease in batches which fit in a page */
static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
/* List of ballooned pages, threaded through the mem_map array. */
......@@ -423,22 +422,12 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
page = pfn_to_page(pfn);
#ifdef CONFIG_XEN_HAVE_PVMMU
/*
* Ballooned out frames are effectively replaced with
* a scratch frame. Ensure direct mappings and the
* p2m are consistent.
*/
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
if (!PageHighMem(page)) {
struct page *scratch_page = get_balloon_scratch_page();
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
pfn_pte(page_to_pfn(scratch_page),
PAGE_KERNEL_RO), 0);
__pte_ma(0), 0);
BUG_ON(ret);
put_balloon_scratch_page();
}
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
......@@ -500,18 +489,6 @@ static void balloon_process(struct work_struct *work)
mutex_unlock(&balloon_mutex);
}
struct page *get_balloon_scratch_page(void)
{
struct page *ret = get_cpu_var(balloon_scratch_page);
BUG_ON(ret == NULL);
return ret;
}
void put_balloon_scratch_page(void)
{
put_cpu_var(balloon_scratch_page);
}
/* Resets the Xen limit, sets new target, and kicks off processing. */
void balloon_set_new_target(unsigned long target)
{
......@@ -605,61 +582,13 @@ static void __init balloon_add_region(unsigned long start_pfn,
}
}
static int alloc_balloon_scratch_page(int cpu)
{
if (per_cpu(balloon_scratch_page, cpu) != NULL)
return 0;
per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
if (per_cpu(balloon_scratch_page, cpu) == NULL) {
pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
return -ENOMEM;
}
return 0;
}
static int balloon_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
int cpu = (long)hcpu;
switch (action) {
case CPU_UP_PREPARE:
if (alloc_balloon_scratch_page(cpu))
return NOTIFY_BAD;
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block balloon_cpu_notifier = {
.notifier_call = balloon_cpu_notify,
};
static int __init balloon_init(void)
{
int i, cpu;
int i;
if (!xen_domain())
return -ENODEV;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
register_cpu_notifier(&balloon_cpu_notifier);
get_online_cpus();
for_each_online_cpu(cpu) {
if (alloc_balloon_scratch_page(cpu)) {
put_online_cpus();
unregister_cpu_notifier(&balloon_cpu_notifier);
return -ENOMEM;
}
}
put_online_cpus();
}
pr_info("Initialising balloon driver\n");
balloon_stats.current_pages = xen_pv_domain()
......@@ -696,15 +625,4 @@ static int __init balloon_init(void)
subsys_initcall(balloon_init);
static int __init balloon_clear(void)
{
int cpu;
for_each_possible_cpu(cpu)
per_cpu(balloon_scratch_page, cpu) = NULL;
return 0;
}
early_initcall(balloon_clear);
MODULE_LICENSE("GPL");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment