Commit 73232603 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge branch 'kvm-coco-hooks' into HEAD

Common patches for the target-independent functionality and hooks
that are needed by SEV-SNP and TDX.
parents 7d41e24d f32fb328
...@@ -139,6 +139,9 @@ KVM_X86_OP(vcpu_deliver_sipi_vector) ...@@ -139,6 +139,9 @@ KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL(get_untagged_addr) KVM_X86_OP_OPTIONAL(get_untagged_addr)
KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
KVM_X86_OP_OPTIONAL_RET0(private_max_mapping_level)
KVM_X86_OP_OPTIONAL(gmem_invalidate)
#undef KVM_X86_OP #undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL #undef KVM_X86_OP_OPTIONAL
......
...@@ -1812,6 +1812,9 @@ struct kvm_x86_ops { ...@@ -1812,6 +1812,9 @@ struct kvm_x86_ops {
gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn);
}; };
struct kvm_x86_nested_ops { struct kvm_x86_nested_ops {
......
...@@ -4280,6 +4280,25 @@ static inline u8 kvm_max_level_for_order(int order) ...@@ -4280,6 +4280,25 @@ static inline u8 kvm_max_level_for_order(int order)
return PG_LEVEL_4K; return PG_LEVEL_4K;
} }
static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
u8 max_level, int gmem_order)
{
u8 req_max_level;
if (max_level == PG_LEVEL_4K)
return PG_LEVEL_4K;
max_level = min(kvm_max_level_for_order(gmem_order), max_level);
if (max_level == PG_LEVEL_4K)
return PG_LEVEL_4K;
req_max_level = static_call(kvm_x86_private_max_mapping_level)(kvm, pfn);
if (req_max_level)
max_level = min(max_level, req_max_level);
return req_max_level;
}
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault) struct kvm_page_fault *fault)
{ {
...@@ -4297,9 +4316,9 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, ...@@ -4297,9 +4316,9 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
return r; return r;
} }
fault->max_level = min(kvm_max_level_for_order(max_order),
fault->max_level);
fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY); fault->map_writable = !(fault->slot->flags & KVM_MEM_READONLY);
fault->max_level = kvm_max_private_mapping_level(vcpu->kvm, fault->pfn,
fault->max_level, max_order);
return RET_PF_CONTINUE; return RET_PF_CONTINUE;
} }
......
...@@ -13599,6 +13599,19 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) ...@@ -13599,6 +13599,19 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
} }
EXPORT_SYMBOL_GPL(kvm_arch_no_poll); EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
{
return static_call(kvm_x86_gmem_prepare)(kvm, pfn, gfn, max_order);
}
#endif
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
{
static_call_cond(kvm_x86_gmem_invalidate)(start, end);
}
#endif
int kvm_spec_ctrl_test_value(u64 value) int kvm_spec_ctrl_test_value(u64 value)
{ {
......
...@@ -2441,4 +2441,40 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, ...@@ -2441,4 +2441,40 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
} }
#endif /* CONFIG_KVM_PRIVATE_MEM */ #endif /* CONFIG_KVM_PRIVATE_MEM */
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
#endif
/**
* kvm_gmem_populate() - Populate/prepare a GPA range with guest data
*
* @kvm: KVM instance
* @gfn: starting GFN to be populated
* @src: userspace-provided buffer containing data to copy into GFN range
* (passed to @post_populate, and incremented on each iteration
* if not NULL)
* @npages: number of pages to copy from userspace-buffer
* @post_populate: callback to issue for each gmem page that backs the GPA
* range
* @opaque: opaque data to pass to @post_populate callback
*
* This is primarily intended for cases where a gmem-backed GPA range needs
* to be initialized with userspace-provided data prior to being mapped into
* the guest as a private page. This should be called with the slots->lock
* held so that caller-enforced invariants regarding the expected memory
* attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES.
*
* Returns the number of pages that were populated.
*/
typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
void __user *src, int order, void *opaque);
long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
kvm_gmem_populate_cb post_populate, void *opaque);
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
#endif
#endif #endif
...@@ -207,6 +207,7 @@ enum mapping_flags { ...@@ -207,6 +207,7 @@ enum mapping_flags {
AS_STABLE_WRITES, /* must wait for writeback before modifying AS_STABLE_WRITES, /* must wait for writeback before modifying
folio contents */ folio contents */
AS_UNMOVABLE, /* The mapping cannot be moved, ever */ AS_UNMOVABLE, /* The mapping cannot be moved, ever */
AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping */
}; };
/** /**
......
...@@ -233,6 +233,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) ...@@ -233,6 +233,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end)
* doing a complex calculation here, and then doing the zeroing * doing a complex calculation here, and then doing the zeroing
* anyway if the page split fails. * anyway if the page split fails.
*/ */
if (!(folio->mapping->flags & AS_INACCESSIBLE))
folio_zero_range(folio, offset, length); folio_zero_range(folio, offset, length);
if (folio_has_private(folio)) if (folio_has_private(folio))
......
...@@ -109,3 +109,11 @@ config KVM_GENERIC_PRIVATE_MEM ...@@ -109,3 +109,11 @@ config KVM_GENERIC_PRIVATE_MEM
select KVM_GENERIC_MEMORY_ATTRIBUTES select KVM_GENERIC_MEMORY_ATTRIBUTES
select KVM_PRIVATE_MEM select KVM_PRIVATE_MEM
bool bool
config HAVE_KVM_GMEM_PREPARE
bool
depends on KVM_PRIVATE_MEM
config HAVE_KVM_GMEM_INVALIDATE
bool
depends on KVM_PRIVATE_MEM
...@@ -13,14 +13,50 @@ struct kvm_gmem { ...@@ -13,14 +13,50 @@ struct kvm_gmem {
struct list_head entry; struct list_head entry;
}; };
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio)
{
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
struct list_head *gmem_list = &inode->i_mapping->i_private_list;
struct kvm_gmem *gmem;
list_for_each_entry(gmem, gmem_list, entry) {
struct kvm_memory_slot *slot;
struct kvm *kvm = gmem->kvm;
struct page *page;
kvm_pfn_t pfn;
gfn_t gfn;
int rc;
if (!kvm_arch_gmem_prepare_needed(kvm))
continue;
slot = xa_load(&gmem->bindings, index);
if (!slot)
continue;
page = folio_file_page(folio, index);
pfn = page_to_pfn(page);
gfn = slot->base_gfn + index - slot->gmem.pgoff;
rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page)));
if (rc) {
pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx, error %d.\n",
index, rc);
return rc;
}
}
#endif
return 0;
}
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
{ {
struct folio *folio; struct folio *folio;
/* TODO: Support huge pages. */ /* TODO: Support huge pages. */
folio = filemap_grab_folio(inode->i_mapping, index); folio = filemap_grab_folio(inode->i_mapping, index);
if (IS_ERR_OR_NULL(folio)) if (IS_ERR(folio))
return NULL; return folio;
/* /*
* Use the up-to-date flag to track whether or not the memory has been * Use the up-to-date flag to track whether or not the memory has been
...@@ -41,6 +77,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) ...@@ -41,6 +77,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
folio_mark_uptodate(folio); folio_mark_uptodate(folio);
} }
if (prepare) {
int r = kvm_gmem_prepare_folio(inode, index, folio);
if (r < 0) {
folio_unlock(folio);
folio_put(folio);
return ERR_PTR(r);
}
}
/* /*
* Ignore accessed, referenced, and dirty flags. The memory is * Ignore accessed, referenced, and dirty flags. The memory is
* unevictable and there is no storage to write back to. * unevictable and there is no storage to write back to.
...@@ -145,9 +190,9 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) ...@@ -145,9 +190,9 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
break; break;
} }
folio = kvm_gmem_get_folio(inode, index); folio = kvm_gmem_get_folio(inode, index, true);
if (!folio) { if (IS_ERR(folio)) {
r = -ENOMEM; r = PTR_ERR(folio);
break; break;
} }
...@@ -298,10 +343,24 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol ...@@ -298,10 +343,24 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
return MF_DELAYED; return MF_DELAYED;
} }
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
static void kvm_gmem_free_folio(struct folio *folio)
{
struct page *page = folio_page(folio, 0);
kvm_pfn_t pfn = page_to_pfn(page);
int order = folio_order(folio);
kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order));
}
#endif
static const struct address_space_operations kvm_gmem_aops = { static const struct address_space_operations kvm_gmem_aops = {
.dirty_folio = noop_dirty_folio, .dirty_folio = noop_dirty_folio,
.migrate_folio = kvm_gmem_migrate_folio, .migrate_folio = kvm_gmem_migrate_folio,
.error_remove_folio = kvm_gmem_error_folio, .error_remove_folio = kvm_gmem_error_folio,
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
.free_folio = kvm_gmem_free_folio,
#endif
}; };
static int kvm_gmem_getattr(struct mnt_idmap *idmap, const struct path *path, static int kvm_gmem_getattr(struct mnt_idmap *idmap, const struct path *path,
...@@ -357,6 +416,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) ...@@ -357,6 +416,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
inode->i_private = (void *)(unsigned long)flags; inode->i_private = (void *)(unsigned long)flags;
inode->i_op = &kvm_gmem_iops; inode->i_op = &kvm_gmem_iops;
inode->i_mapping->a_ops = &kvm_gmem_aops; inode->i_mapping->a_ops = &kvm_gmem_aops;
inode->i_mapping->flags |= AS_INACCESSIBLE;
inode->i_mode |= S_IFREG; inode->i_mode |= S_IFREG;
inode->i_size = size; inode->i_size = size;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
...@@ -482,32 +542,29 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) ...@@ -482,32 +542,29 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
fput(file); fput(file);
} }
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, int *max_order) gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
{ {
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff; pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
struct kvm_gmem *gmem; struct kvm_gmem *gmem = file->private_data;
struct folio *folio; struct folio *folio;
struct page *page; struct page *page;
struct file *file;
int r; int r;
file = kvm_gmem_get_file(slot); if (file != slot->gmem.file) {
if (!file) WARN_ON_ONCE(slot->gmem.file);
return -EFAULT; return -EFAULT;
}
gmem = file->private_data; gmem = file->private_data;
if (xa_load(&gmem->bindings, index) != slot) {
if (WARN_ON_ONCE(xa_load(&gmem->bindings, index) != slot)) { WARN_ON_ONCE(xa_load(&gmem->bindings, index));
r = -EIO; return -EIO;
goto out_fput;
} }
folio = kvm_gmem_get_folio(file_inode(file), index); folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
if (!folio) { if (IS_ERR(folio))
r = -ENOMEM; return PTR_ERR(folio);
goto out_fput;
}
if (folio_test_hwpoison(folio)) { if (folio_test_hwpoison(folio)) {
r = -EHWPOISON; r = -EHWPOISON;
...@@ -524,9 +581,73 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, ...@@ -524,9 +581,73 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
out_unlock: out_unlock:
folio_unlock(folio); folio_unlock(folio);
out_fput:
fput(file);
return r; return r;
} }
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
{
struct file *file = kvm_gmem_get_file(slot);
int r;
if (!file)
return -EFAULT;
r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
fput(file);
return r;
}
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
kvm_gmem_populate_cb post_populate, void *opaque)
{
struct file *file;
struct kvm_memory_slot *slot;
void __user *p;
int ret = 0, max_order;
long i;
lockdep_assert_held(&kvm->slots_lock);
if (npages < 0)
return -EINVAL;
slot = gfn_to_memslot(kvm, start_gfn);
if (!kvm_slot_can_be_private(slot))
return -EINVAL;
file = kvm_gmem_get_file(slot);
if (!file)
return -EFAULT;
filemap_invalidate_lock(file->f_mapping);
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
for (i = 0; i < npages; i += (1 << max_order)) {
gfn_t gfn = start_gfn + i;
kvm_pfn_t pfn;
ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false);
if (ret)
break;
if (!IS_ALIGNED(gfn, (1 << max_order)) ||
(npages - i) < (1 << max_order))
max_order = 0;
p = src ? src + i * PAGE_SIZE : NULL;
ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
put_page(pfn_to_page(pfn));
if (ret)
break;
}
filemap_invalidate_unlock(file->f_mapping);
fput(file);
return ret && !i ? ret : i;
}
EXPORT_SYMBOL_GPL(kvm_gmem_populate);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment