Commit d62007ed authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

KVM: x86/mmu: Zap _all_ roots when unmapping gfn range in TDP MMU

Zap both valid and invalid roots when zapping/unmapping a gfn range, as
KVM must ensure it holds no references to the freed page after returning
from the unmap operation.  Most notably, the TDP MMU doesn't zap invalid
roots in mmu_notifier callbacks.  This leads to use-after-free and other
issues if the mmu_notifier runs to completion while an invalid root
zapper yields as KVM fails to honor the requirement that there must be
_no_ references to the page after the mmu_notifier returns.

The bug is most easily reproduced by hacking KVM to cause a collision
between set_nx_huge_pages() and kvm_mmu_notifier_release(), but the bug
exists between kvm_mmu_notifier_invalidate_range_start() and memslot
updates as well.  Invalidating a root ensures pages aren't accessible by
the guest, and KVM won't read or write page data itself, but KVM will
trigger e.g. kvm_set_pfn_dirty() when zapping SPTEs, and thus completing
a zap of an invalid root _after_ the mmu_notifier returns is fatal.

  WARNING: CPU: 24 PID: 1496 at arch/x86/kvm/../../../virt/kvm/kvm_main.c:173 [kvm]
  RIP: 0010:kvm_is_zone_device_pfn+0x96/0xa0 [kvm]
  Call Trace:
   <TASK>
   kvm_set_pfn_dirty+0xa8/0xe0 [kvm]
   __handle_changed_spte+0x2ab/0x5e0 [kvm]
   __handle_changed_spte+0x2ab/0x5e0 [kvm]
   __handle_changed_spte+0x2ab/0x5e0 [kvm]
   zap_gfn_range+0x1f3/0x310 [kvm]
   kvm_tdp_mmu_zap_invalidated_roots+0x50/0x90 [kvm]
   kvm_mmu_zap_all_fast+0x177/0x1a0 [kvm]
   set_nx_huge_pages+0xb4/0x190 [kvm]
   param_attr_store+0x70/0x100
   module_attr_store+0x19/0x30
   kernfs_fop_write_iter+0x119/0x1b0
   new_sync_write+0x11c/0x1b0
   vfs_write+0x1cc/0x270
   ksys_write+0x5f/0xe0
   do_syscall_64+0x38/0xc0
   entry_SYSCALL_64_after_hwframe+0x44/0xae
   </TASK>

Fixes: b7cccd39 ("KVM: x86/mmu: Fast invalidation for TDP MMU")
Cc: stable@vger.kernel.org
Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Message-Id: <20211215011557.399940-4-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 04dc4e6c
...@@ -99,15 +99,18 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, ...@@ -99,15 +99,18 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
} }
/* /*
* Finds the next valid root after root (or the first valid root if root * Returns the next root after @prev_root (or the first root if @prev_root is
* is NULL), takes a reference on it, and returns that next root. If root * NULL). A reference to the returned root is acquired, and the reference to
* is not NULL, this thread should have already taken a reference on it, and * @prev_root is released (the caller obviously must hold a reference to
* that reference will be dropped. If no valid root is found, this * @prev_root if it's non-NULL).
* function will return NULL. *
* If @only_valid is true, invalid roots are skipped.
*
* Returns NULL if the end of tdp_mmu_roots was reached.
*/ */
static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
struct kvm_mmu_page *prev_root, struct kvm_mmu_page *prev_root,
bool shared) bool shared, bool only_valid)
{ {
struct kvm_mmu_page *next_root; struct kvm_mmu_page *next_root;
...@@ -122,7 +125,7 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, ...@@ -122,7 +125,7 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
typeof(*next_root), link); typeof(*next_root), link);
while (next_root) { while (next_root) {
if (!next_root->role.invalid && if ((!only_valid || !next_root->role.invalid) &&
kvm_tdp_mmu_get_root(kvm, next_root)) kvm_tdp_mmu_get_root(kvm, next_root))
break; break;
...@@ -148,13 +151,19 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, ...@@ -148,13 +151,19 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
* mode. In the unlikely event that this thread must free a root, the lock * mode. In the unlikely event that this thread must free a root, the lock
* will be temporarily dropped and reacquired in write mode. * will be temporarily dropped and reacquired in write mode.
*/ */
#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ #define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, _only_valid)\
for (_root = tdp_mmu_next_root(_kvm, NULL, _shared); \ for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, _only_valid); \
_root; \ _root; \
_root = tdp_mmu_next_root(_kvm, _root, _shared)) \ _root = tdp_mmu_next_root(_kvm, _root, _shared, _only_valid)) \
if (kvm_mmu_page_as_id(_root) != _as_id) { \ if (kvm_mmu_page_as_id(_root) != _as_id) { \
} else } else
#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
__for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true)
#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
__for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, false)
#define for_each_tdp_mmu_root(_kvm, _root, _as_id) \ #define for_each_tdp_mmu_root(_kvm, _root, _as_id) \
list_for_each_entry_rcu(_root, &_kvm->arch.tdp_mmu_roots, link, \ list_for_each_entry_rcu(_root, &_kvm->arch.tdp_mmu_roots, link, \
lockdep_is_held_type(&kvm->mmu_lock, 0) || \ lockdep_is_held_type(&kvm->mmu_lock, 0) || \
...@@ -1224,7 +1233,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, ...@@ -1224,7 +1233,7 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
lockdep_assert_held_read(&kvm->mmu_lock); lockdep_assert_held_read(&kvm->mmu_lock);
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages, min_level); slot->base_gfn + slot->npages, min_level);
...@@ -1294,7 +1303,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, ...@@ -1294,7 +1303,7 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
lockdep_assert_held_read(&kvm->mmu_lock); lockdep_assert_held_read(&kvm->mmu_lock);
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages); slot->base_gfn + slot->npages);
...@@ -1419,7 +1428,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, ...@@ -1419,7 +1428,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
lockdep_assert_held_read(&kvm->mmu_lock); lockdep_assert_held_read(&kvm->mmu_lock);
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
zap_collapsible_spte_range(kvm, root, slot); zap_collapsible_spte_range(kvm, root, slot);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment