Commit 8d25b7be authored by Paolo Bonzini's avatar Paolo Bonzini

KVM: x86: pull kvm->srcu read-side to kvm_arch_vcpu_ioctl_run

kvm_arch_vcpu_ioctl_run is already doing srcu_read_lock/unlock in two
places, namely vcpu_run and post_kvm_run_save, and a third is actually
needed around the call to vcpu->arch.complete_userspace_io to avoid
the following splat:

  WARNING: suspicious RCU usage
  arch/x86/kvm/pmu.c:190 suspicious rcu_dereference_check() usage!
  other info that might help us debug this:
  rcu_scheduler_active = 2, debug_locks = 1
  1 lock held by CPU 28/KVM/370841:
  #0: ff11004089f280b8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x87/0x730 [kvm]
  Call Trace:
   <TASK>
   dump_stack_lvl+0x59/0x73
   reprogram_fixed_counter+0x15d/0x1a0 [kvm]
   kvm_pmu_trigger_event+0x1a3/0x260 [kvm]
   ? free_moved_vector+0x1b4/0x1e0
   complete_fast_pio_in+0x8a/0xd0 [kvm]

This splat is not at all unexpected, since complete_userspace_io callbacks
can execute similar code to vmexits.  For example, SVM with nrips=false
will call into the emulator from svm_skip_emulated_instruction().

While it's tempting to never acquire kvm->srcu for an uninitialized vCPU,
practically speaking there's no penalty to acquiring kvm->srcu "early"
as the KVM_MP_STATE_UNINITIALIZED path is a one-time thing per vCPU.  On
the other hand, seemingly innocuous helpers like kvm_apic_accept_events()
and sync_regs() can theoretically reach code that might access
SRCU-protected data structures, e.g. sync_regs() can trigger forced
existing of nested mode via kvm_vcpu_ioctl_x86_set_vcpu_events().
Reported-by: default avatarLike Xu <likexu@tencent.com>
Co-developed-by: default avatarSean Christopherson <seanjc@google.com>
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent c6c937d6
...@@ -9180,6 +9180,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) ...@@ -9180,6 +9180,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
likely(!pic_in_kernel(vcpu->kvm)); likely(!pic_in_kernel(vcpu->kvm));
} }
/* Called within kvm->srcu read side. */
static void post_kvm_run_save(struct kvm_vcpu *vcpu) static void post_kvm_run_save(struct kvm_vcpu *vcpu)
{ {
struct kvm_run *kvm_run = vcpu->run; struct kvm_run *kvm_run = vcpu->run;
...@@ -9188,16 +9189,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) ...@@ -9188,16 +9189,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu);
/*
* The call to kvm_ready_for_interrupt_injection() may end up in
* kvm_xen_has_interrupt() which may require the srcu lock to be
* held, to protect against changes in the vcpu_info address.
*/
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_run->ready_for_interrupt_injection = kvm_run->ready_for_interrupt_injection =
pic_in_kernel(vcpu->kvm) || pic_in_kernel(vcpu->kvm) ||
kvm_vcpu_ready_for_interrupt_injection(vcpu); kvm_vcpu_ready_for_interrupt_injection(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (is_smm(vcpu)) if (is_smm(vcpu))
kvm_run->flags |= KVM_RUN_X86_SMM; kvm_run->flags |= KVM_RUN_X86_SMM;
...@@ -9815,6 +9809,7 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) ...@@ -9815,6 +9809,7 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
/* /*
* Called within kvm->srcu read side.
* Returns 1 to let vcpu_run() continue the guest execution loop without * Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the * exiting to the userspace. Otherwise, the value will be returned to the
* userspace. * userspace.
...@@ -10193,6 +10188,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -10193,6 +10188,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
return r; return r;
} }
/* Called within kvm->srcu read side. */
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{ {
bool hv_timer; bool hv_timer;
...@@ -10252,12 +10248,12 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) ...@@ -10252,12 +10248,12 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
!vcpu->arch.apf.halted); !vcpu->arch.apf.halted);
} }
/* Called within kvm->srcu read side. */
static int vcpu_run(struct kvm_vcpu *vcpu) static int vcpu_run(struct kvm_vcpu *vcpu)
{ {
int r; int r;
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
vcpu->arch.l1tf_flush_l1d = true; vcpu->arch.l1tf_flush_l1d = true;
for (;;) { for (;;) {
...@@ -10285,14 +10281,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu) ...@@ -10285,14 +10281,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (__xfer_to_guest_mode_work_pending()) { if (__xfer_to_guest_mode_work_pending()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
r = xfer_to_guest_mode_handle_work(vcpu); r = xfer_to_guest_mode_handle_work(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (r) if (r)
return r; return r;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
} }
} }
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
return r; return r;
} }
...@@ -10398,6 +10392,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) ...@@ -10398,6 +10392,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{ {
struct kvm_run *kvm_run = vcpu->run; struct kvm_run *kvm_run = vcpu->run;
struct kvm *kvm = vcpu->kvm;
int r; int r;
vcpu_load(vcpu); vcpu_load(vcpu);
...@@ -10405,6 +10400,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ...@@ -10405,6 +10400,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_run->flags = 0; kvm_run->flags = 0;
kvm_load_guest_fpu(vcpu); kvm_load_guest_fpu(vcpu);
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) { if (kvm_run->immediate_exit) {
r = -EINTR; r = -EINTR;
...@@ -10415,7 +10411,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ...@@ -10415,7 +10411,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* use before KVM has ever run the vCPU. * use before KVM has ever run the vCPU.
*/ */
WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu)); WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu); kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (kvm_apic_accept_events(vcpu) < 0) { if (kvm_apic_accept_events(vcpu) < 0) {
r = 0; r = 0;
goto out; goto out;
...@@ -10475,8 +10475,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) ...@@ -10475,8 +10475,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (kvm_run->kvm_valid_regs) if (kvm_run->kvm_valid_regs)
store_regs(vcpu); store_regs(vcpu);
post_kvm_run_save(vcpu); post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu); srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_sigset_deactivate(vcpu);
vcpu_put(vcpu); vcpu_put(vcpu);
return r; return r;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment