Commit 0acc7239 authored by Oliver Upton's avatar Oliver Upton Committed by Marc Zyngier

KVM: arm64: Avoid vcpu->mutex v. kvm->lock inversion in CPU_ON

KVM/arm64 had the lock ordering backwards on vcpu->mutex and kvm->lock
from the very beginning. One such example is the way vCPU resets are
handled: the kvm->lock is acquired while handling a guest CPU_ON PSCI
call.

Add a dedicated lock to serialize writes to kvm_vcpu_arch::{mp_state,
reset_state}. Promote all accessors of mp_state to {READ,WRITE}_ONCE()
as readers do not acquire the mp_state_lock. While at it, plug yet
another race by taking the mp_state_lock in the KVM_SET_MP_STATE ioctl
handler.

As changes to MP state are now guarded with a dedicated lock, drop the
kvm->lock acquisition from the PSCI CPU_ON path. Similarly, move the
reader of reset_state outside of the kvm->lock and instead protect it
with the mp_state_lock. Note that writes to reset_state::reset have been
demoted to regular stores as both readers and writers acquire the
mp_state_lock.

While the kvm->lock inversion still exists in kvm_reset_vcpu(), at least
now PSCI CPU_ON no longer depends on it for serializing vCPU reset.

Cc: stable@vger.kernel.org
Tested-by: default avatarJeremy Linton <jeremy.linton@arm.com>
Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230327164747.2466958-2-oliver.upton@linux.dev
parent 197b6b60
...@@ -522,6 +522,7 @@ struct kvm_vcpu_arch { ...@@ -522,6 +522,7 @@ struct kvm_vcpu_arch {
/* vcpu power state */ /* vcpu power state */
struct kvm_mp_state mp_state; struct kvm_mp_state mp_state;
spinlock_t mp_state_lock;
/* Cache some mmu pages needed inside spinlock regions */ /* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_cache;
......
...@@ -326,6 +326,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -326,6 +326,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{ {
int err; int err;
spin_lock_init(&vcpu->arch.mp_state_lock);
/* Force users to call KVM_ARM_VCPU_INIT */ /* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1; vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
...@@ -443,34 +445,41 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) ...@@ -443,34 +445,41 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->cpu = -1; vcpu->cpu = -1;
} }
void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu) static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
} }
void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->arch.mp_state_lock);
__kvm_arm_vcpu_power_off(vcpu);
spin_unlock(&vcpu->arch.mp_state_lock);
}
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu) bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED; return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
} }
static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu) static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED; WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED);
kvm_make_request(KVM_REQ_SUSPEND, vcpu); kvm_make_request(KVM_REQ_SUSPEND, vcpu);
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
} }
static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu) static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
{ {
return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED; return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED;
} }
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state) struct kvm_mp_state *mp_state)
{ {
*mp_state = vcpu->arch.mp_state; *mp_state = READ_ONCE(vcpu->arch.mp_state);
return 0; return 0;
} }
...@@ -480,12 +489,14 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ...@@ -480,12 +489,14 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{ {
int ret = 0; int ret = 0;
spin_lock(&vcpu->arch.mp_state_lock);
switch (mp_state->mp_state) { switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE: case KVM_MP_STATE_RUNNABLE:
vcpu->arch.mp_state = *mp_state; WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
break; break;
case KVM_MP_STATE_STOPPED: case KVM_MP_STATE_STOPPED:
kvm_arm_vcpu_power_off(vcpu); __kvm_arm_vcpu_power_off(vcpu);
break; break;
case KVM_MP_STATE_SUSPENDED: case KVM_MP_STATE_SUSPENDED:
kvm_arm_vcpu_suspend(vcpu); kvm_arm_vcpu_suspend(vcpu);
...@@ -494,6 +505,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ...@@ -494,6 +505,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
ret = -EINVAL; ret = -EINVAL;
} }
spin_unlock(&vcpu->arch.mp_state_lock);
return ret; return ret;
} }
...@@ -1213,7 +1226,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, ...@@ -1213,7 +1226,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
kvm_arm_vcpu_power_off(vcpu); kvm_arm_vcpu_power_off(vcpu);
else else
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
return 0; return 0;
} }
......
...@@ -62,6 +62,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) ...@@ -62,6 +62,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
struct vcpu_reset_state *reset_state; struct vcpu_reset_state *reset_state;
struct kvm *kvm = source_vcpu->kvm; struct kvm *kvm = source_vcpu->kvm;
struct kvm_vcpu *vcpu = NULL; struct kvm_vcpu *vcpu = NULL;
int ret = PSCI_RET_SUCCESS;
unsigned long cpu_id; unsigned long cpu_id;
cpu_id = smccc_get_arg1(source_vcpu); cpu_id = smccc_get_arg1(source_vcpu);
...@@ -76,11 +77,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) ...@@ -76,11 +77,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/ */
if (!vcpu) if (!vcpu)
return PSCI_RET_INVALID_PARAMS; return PSCI_RET_INVALID_PARAMS;
spin_lock(&vcpu->arch.mp_state_lock);
if (!kvm_arm_vcpu_stopped(vcpu)) { if (!kvm_arm_vcpu_stopped(vcpu)) {
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
return PSCI_RET_ALREADY_ON; ret = PSCI_RET_ALREADY_ON;
else else
return PSCI_RET_INVALID_PARAMS; ret = PSCI_RET_INVALID_PARAMS;
goto out_unlock;
} }
reset_state = &vcpu->arch.reset_state; reset_state = &vcpu->arch.reset_state;
...@@ -96,7 +101,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) ...@@ -96,7 +101,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
*/ */
reset_state->r0 = smccc_get_arg3(source_vcpu); reset_state->r0 = smccc_get_arg3(source_vcpu);
WRITE_ONCE(reset_state->reset, true); reset_state->reset = true;
kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
/* /*
...@@ -108,7 +113,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) ...@@ -108,7 +113,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE; vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
kvm_vcpu_wake_up(vcpu); kvm_vcpu_wake_up(vcpu);
return PSCI_RET_SUCCESS; out_unlock:
spin_unlock(&vcpu->arch.mp_state_lock);
return ret;
} }
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
...@@ -168,8 +175,11 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) ...@@ -168,8 +175,11 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
* after this call is handled and before the VCPUs have been * after this call is handled and before the VCPUs have been
* re-initialized. * re-initialized.
*/ */
kvm_for_each_vcpu(i, tmp, vcpu->kvm) kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED; spin_lock(&tmp->arch.mp_state_lock);
WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
spin_unlock(&tmp->arch.mp_state_lock);
}
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
...@@ -229,7 +239,6 @@ static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 ...@@ -229,7 +239,6 @@ static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32
static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
{ {
struct kvm *kvm = vcpu->kvm;
u32 psci_fn = smccc_get_function(vcpu); u32 psci_fn = smccc_get_function(vcpu);
unsigned long val; unsigned long val;
int ret = 1; int ret = 1;
...@@ -254,9 +263,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) ...@@ -254,9 +263,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
kvm_psci_narrow_to_32bit(vcpu); kvm_psci_narrow_to_32bit(vcpu);
fallthrough; fallthrough;
case PSCI_0_2_FN64_CPU_ON: case PSCI_0_2_FN64_CPU_ON:
mutex_lock(&kvm->lock);
val = kvm_psci_vcpu_on(vcpu); val = kvm_psci_vcpu_on(vcpu);
mutex_unlock(&kvm->lock);
break; break;
case PSCI_0_2_FN_AFFINITY_INFO: case PSCI_0_2_FN_AFFINITY_INFO:
kvm_psci_narrow_to_32bit(vcpu); kvm_psci_narrow_to_32bit(vcpu);
...@@ -395,7 +402,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) ...@@ -395,7 +402,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
{ {
struct kvm *kvm = vcpu->kvm;
u32 psci_fn = smccc_get_function(vcpu); u32 psci_fn = smccc_get_function(vcpu);
unsigned long val; unsigned long val;
...@@ -405,9 +411,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) ...@@ -405,9 +411,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
val = PSCI_RET_SUCCESS; val = PSCI_RET_SUCCESS;
break; break;
case KVM_PSCI_FN_CPU_ON: case KVM_PSCI_FN_CPU_ON:
mutex_lock(&kvm->lock);
val = kvm_psci_vcpu_on(vcpu); val = kvm_psci_vcpu_on(vcpu);
mutex_unlock(&kvm->lock);
break; break;
default: default:
val = PSCI_RET_NOT_SUPPORTED; val = PSCI_RET_NOT_SUPPORTED;
......
...@@ -264,15 +264,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) ...@@ -264,15 +264,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
mutex_lock(&vcpu->kvm->lock); mutex_lock(&vcpu->kvm->lock);
ret = kvm_set_vm_width(vcpu); ret = kvm_set_vm_width(vcpu);
if (!ret) {
reset_state = vcpu->arch.reset_state;
WRITE_ONCE(vcpu->arch.reset_state.reset, false);
}
mutex_unlock(&vcpu->kvm->lock); mutex_unlock(&vcpu->kvm->lock);
if (ret) if (ret)
return ret; return ret;
spin_lock(&vcpu->arch.mp_state_lock);
reset_state = vcpu->arch.reset_state;
vcpu->arch.reset_state.reset = false;
spin_unlock(&vcpu->arch.mp_state_lock);
/* Reset PMU outside of the non-preemptible section */ /* Reset PMU outside of the non-preemptible section */
kvm_pmu_vcpu_reset(vcpu); kvm_pmu_vcpu_reset(vcpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment