Commit 2121cade authored by Yong-Xuan Wang's avatar Yong-Xuan Wang Committed by Anup Patel

RISCV: KVM: Introduce mp_state_lock to avoid lock inversion

Documentation/virt/kvm/locking.rst advises that kvm->lock should be
acquired outside vcpu->mutex and kvm->srcu. However, when KVM/RISC-V
handling SBI_EXT_HSM_HART_START, the lock ordering is vcpu->mutex,
kvm->srcu then kvm->lock.

Although the lockdep checking no longer complains about this after commit
f0f44752 ("rcu: Annotate SRCU's update-side lockdep dependencies"),
it's necessary to replace kvm->lock with a new dedicated lock to ensure
only one hart can execute the SBI_EXT_HSM_HART_START call for the target
hart simultaneously.

Additionally, this patch also rename "power_off" to "mp_state" with two
possible values. The vcpu->mp_state_lock also protects the access of
vcpu->mp_state.
Signed-off-by: default avatarYong-Xuan Wang <yongxuan.wang@sifive.com>
Reviewed-by: default avatarAnup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20240417074528.16506-2-yongxuan.wang@sifive.comSigned-off-by: default avatarAnup Patel <anup@brainfault.org>
parent f1c48c1e
...@@ -264,8 +264,9 @@ struct kvm_vcpu_arch { ...@@ -264,8 +264,9 @@ struct kvm_vcpu_arch {
/* Cache pages needed to program page tables with spinlock held */ /* Cache pages needed to program page tables with spinlock held */
struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_cache;
/* VCPU power-off state */ /* VCPU power state */
bool power_off; struct kvm_mp_state mp_state;
spinlock_t mp_state_lock;
/* Don't run the VCPU (blocked) */ /* Don't run the VCPU (blocked) */
bool pause; bool pause;
...@@ -386,8 +387,11 @@ int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); ...@@ -386,8 +387,11 @@ int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);
......
...@@ -102,6 +102,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -102,6 +102,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *cntx; struct kvm_cpu_context *cntx;
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
spin_lock_init(&vcpu->arch.mp_state_lock);
/* Mark this VCPU never ran */ /* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false; vcpu->arch.ran_atleast_once = false;
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
...@@ -201,7 +203,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) ...@@ -201,7 +203,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{ {
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
!vcpu->arch.power_off && !vcpu->arch.pause); !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
} }
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
...@@ -429,26 +431,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) ...@@ -429,26 +431,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
} }
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.power_off = true; WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
} }
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.power_off = false; spin_lock(&vcpu->arch.mp_state_lock);
__kvm_riscv_vcpu_power_off(vcpu);
spin_unlock(&vcpu->arch.mp_state_lock);
}
void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
{
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
kvm_vcpu_wake_up(vcpu); kvm_vcpu_wake_up(vcpu);
} }
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->arch.mp_state_lock);
__kvm_riscv_vcpu_power_on(vcpu);
spin_unlock(&vcpu->arch.mp_state_lock);
}
bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
{
return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state) struct kvm_mp_state *mp_state)
{ {
if (vcpu->arch.power_off) *mp_state = READ_ONCE(vcpu->arch.mp_state);
mp_state->mp_state = KVM_MP_STATE_STOPPED;
else
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
return 0; return 0;
} }
...@@ -458,17 +476,21 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ...@@ -458,17 +476,21 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{ {
int ret = 0; int ret = 0;
spin_lock(&vcpu->arch.mp_state_lock);
switch (mp_state->mp_state) { switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE: case KVM_MP_STATE_RUNNABLE:
vcpu->arch.power_off = false; WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
break; break;
case KVM_MP_STATE_STOPPED: case KVM_MP_STATE_STOPPED:
kvm_riscv_vcpu_power_off(vcpu); __kvm_riscv_vcpu_power_off(vcpu);
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
} }
spin_unlock(&vcpu->arch.mp_state_lock);
return ret; return ret;
} }
...@@ -596,11 +618,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) ...@@ -596,11 +618,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
kvm_vcpu_srcu_read_unlock(vcpu); kvm_vcpu_srcu_read_unlock(vcpu);
rcuwait_wait_event(wait, rcuwait_wait_event(wait,
(!vcpu->arch.power_off) && (!vcpu->arch.pause), (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE); TASK_INTERRUPTIBLE);
kvm_vcpu_srcu_read_lock(vcpu); kvm_vcpu_srcu_read_lock(vcpu);
if (vcpu->arch.power_off || vcpu->arch.pause) { if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
/* /*
* Awaken to handle a signal, request to * Awaken to handle a signal, request to
* sleep again later. * sleep again later.
......
...@@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, ...@@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
unsigned long i; unsigned long i;
struct kvm_vcpu *tmp; struct kvm_vcpu *tmp;
kvm_for_each_vcpu(i, tmp, vcpu->kvm) kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
tmp->arch.power_off = true; spin_lock(&vcpu->arch.mp_state_lock);
WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
spin_unlock(&vcpu->arch.mp_state_lock);
}
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&run->system_event, 0, sizeof(run->system_event)); memset(&run->system_event, 0, sizeof(run->system_event));
......
...@@ -18,12 +18,18 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) ...@@ -18,12 +18,18 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *cp = &vcpu->arch.guest_context; struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
struct kvm_vcpu *target_vcpu; struct kvm_vcpu *target_vcpu;
unsigned long target_vcpuid = cp->a0; unsigned long target_vcpuid = cp->a0;
int ret = 0;
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu) if (!target_vcpu)
return SBI_ERR_INVALID_PARAM; return SBI_ERR_INVALID_PARAM;
if (!target_vcpu->arch.power_off)
return SBI_ERR_ALREADY_AVAILABLE; spin_lock(&target_vcpu->arch.mp_state_lock);
if (!kvm_riscv_vcpu_stopped(target_vcpu)) {
ret = SBI_ERR_ALREADY_AVAILABLE;
goto out;
}
reset_cntx = &target_vcpu->arch.guest_reset_context; reset_cntx = &target_vcpu->arch.guest_reset_context;
/* start address */ /* start address */
...@@ -34,19 +40,31 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) ...@@ -34,19 +40,31 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
reset_cntx->a1 = cp->a2; reset_cntx->a1 = cp->a2;
kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu); kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
kvm_riscv_vcpu_power_on(target_vcpu); __kvm_riscv_vcpu_power_on(target_vcpu);
return 0; out:
spin_unlock(&target_vcpu->arch.mp_state_lock);
return ret;
} }
static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu) static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
{ {
if (vcpu->arch.power_off) int ret = 0;
return SBI_ERR_FAILURE;
kvm_riscv_vcpu_power_off(vcpu); spin_lock(&vcpu->arch.mp_state_lock);
return 0; if (kvm_riscv_vcpu_stopped(vcpu)) {
ret = SBI_ERR_FAILURE;
goto out;
}
__kvm_riscv_vcpu_power_off(vcpu);
out:
spin_unlock(&vcpu->arch.mp_state_lock);
return ret;
} }
static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
...@@ -58,7 +76,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) ...@@ -58,7 +76,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu) if (!target_vcpu)
return SBI_ERR_INVALID_PARAM; return SBI_ERR_INVALID_PARAM;
if (!target_vcpu->arch.power_off) if (!kvm_riscv_vcpu_stopped(target_vcpu))
return SBI_HSM_STATE_STARTED; return SBI_HSM_STATE_STARTED;
else if (vcpu->stat.generic.blocking) else if (vcpu->stat.generic.blocking)
return SBI_HSM_STATE_SUSPENDED; return SBI_HSM_STATE_SUSPENDED;
...@@ -71,14 +89,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, ...@@ -71,14 +89,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
{ {
int ret = 0; int ret = 0;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context; struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
struct kvm *kvm = vcpu->kvm;
unsigned long funcid = cp->a6; unsigned long funcid = cp->a6;
switch (funcid) { switch (funcid) {
case SBI_EXT_HSM_HART_START: case SBI_EXT_HSM_HART_START:
mutex_lock(&kvm->lock);
ret = kvm_sbi_hsm_vcpu_start(vcpu); ret = kvm_sbi_hsm_vcpu_start(vcpu);
mutex_unlock(&kvm->lock);
break; break;
case SBI_EXT_HSM_HART_STOP: case SBI_EXT_HSM_HART_STOP:
ret = kvm_sbi_hsm_vcpu_stop(vcpu); ret = kvm_sbi_hsm_vcpu_stop(vcpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment