Commit 81554693 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "s390:
   - SRCU fix

  PPC:
   - host crash fixes

  x86:
   - bugfixes, including making nested posted interrupts really work

  Generic:
   - tweaks to kvm_stat and to uevents"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: LAPIC: Fix reentrancy issues with preempt notifiers
  tools/kvm_stat: add '-f help' to get the available event list
  tools/kvm_stat: use variables instead of hard paths in help output
  KVM: nVMX: Fix loss of L2's NMI blocking state
  KVM: nVMX: Fix posted intr delivery when vcpu is in guest mode
  x86: irq: Define a global vector for nested posted interrupts
  KVM: x86: do mask out upper bits of PAE CR3
  KVM: make pid available for uevents without debugfs
  KVM: s390: take srcu lock when getting/setting storage keys
  KVM: VMX: remove unused field
  KVM: PPC: Book3S HV: Fix host crash on changing HPT size
  KVM: PPC: Book3S HV: Enable TM before accessing TM registers
parents 8562e89e 7b5e0a4e
......@@ -164,8 +164,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
goto out;
}
if (kvm->arch.hpt.virt)
if (kvm->arch.hpt.virt) {
kvmppc_free_hpt(&kvm->arch.hpt);
kvmppc_rmap_reset(kvm);
}
err = kvmppc_allocate_hpt(&info, order);
if (err < 0)
......
......@@ -3211,6 +3211,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
run->fail_entry.hardware_entry_failure_reason = 0;
return -EINVAL;
}
/* Enable TM so we can read the TM SPRs */
mtmsr(mfmsr() | MSR_TM);
current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
current->thread.tm_texasr = mfspr(SPRN_TEXASR);
......
......@@ -1324,7 +1324,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
{
uint8_t *keys;
uint64_t hva;
int i, r = 0;
int srcu_idx, i, r = 0;
if (args->flags != 0)
return -EINVAL;
......@@ -1342,6 +1342,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
return -ENOMEM;
down_read(&current->mm->mmap_sem);
srcu_idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
......@@ -1353,6 +1354,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (r)
break;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
up_read(&current->mm->mmap_sem);
if (!r) {
......@@ -1370,7 +1372,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
{
uint8_t *keys;
uint64_t hva;
int i, r = 0;
int srcu_idx, i, r = 0;
if (args->flags != 0)
return -EINVAL;
......@@ -1396,6 +1398,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
goto out;
down_read(&current->mm->mmap_sem);
srcu_idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
......@@ -1413,6 +1416,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (r)
break;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
up_read(&current->mm->mmap_sem);
out:
kvfree(keys);
......
......@@ -705,6 +705,7 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_HAVE_KVM
apicinterrupt3 POSTED_INTR_VECTOR kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
apicinterrupt3 POSTED_INTR_WAKEUP_VECTOR kvm_posted_intr_wakeup_ipi smp_kvm_posted_intr_wakeup_ipi
apicinterrupt3 POSTED_INTR_NESTED_VECTOR kvm_posted_intr_nested_ipi smp_kvm_posted_intr_nested_ipi
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
......
......@@ -25,6 +25,8 @@ BUILD_INTERRUPT3(kvm_posted_intr_ipi, POSTED_INTR_VECTOR,
smp_kvm_posted_intr_ipi)
BUILD_INTERRUPT3(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR,
smp_kvm_posted_intr_wakeup_ipi)
BUILD_INTERRUPT3(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR,
smp_kvm_posted_intr_nested_ipi)
#endif
/*
......
......@@ -15,6 +15,7 @@ typedef struct {
#ifdef CONFIG_HAVE_KVM
unsigned int kvm_posted_intr_ipis;
unsigned int kvm_posted_intr_wakeup_ipis;
unsigned int kvm_posted_intr_nested_ipis;
#endif
unsigned int x86_platform_ipis; /* arch dependent */
unsigned int apic_perf_irqs;
......
......@@ -30,6 +30,7 @@ extern asmlinkage void apic_timer_interrupt(void);
extern asmlinkage void x86_platform_ipi(void);
extern asmlinkage void kvm_posted_intr_ipi(void);
extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void kvm_posted_intr_nested_ipi(void);
extern asmlinkage void error_interrupt(void);
extern asmlinkage void irq_work_interrupt(void);
......@@ -62,6 +63,7 @@ extern void trace_call_function_single_interrupt(void);
#define trace_reboot_interrupt reboot_interrupt
#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
#define trace_kvm_posted_intr_nested_ipi kvm_posted_intr_nested_ipi
#endif /* CONFIG_TRACING */
#ifdef CONFIG_X86_LOCAL_APIC
......
......@@ -83,7 +83,6 @@
*/
#define X86_PLATFORM_IPI_VECTOR 0xf7
#define POSTED_INTR_WAKEUP_VECTOR 0xf1
/*
* IRQ work vector:
*/
......@@ -98,6 +97,8 @@
/* Vector for KVM to deliver posted interrupt IPI */
#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#define POSTED_INTR_NESTED_VECTOR 0xf0
#endif
/*
......
......@@ -155,6 +155,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
seq_puts(p, " Posted-interrupt notification event\n");
seq_printf(p, "%*s: ", prec, "NPI");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
irq_stats(j)->kvm_posted_intr_nested_ipis);
seq_puts(p, " Nested posted-interrupt event\n");
seq_printf(p, "%*s: ", prec, "PIW");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
......@@ -313,6 +319,19 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
exiting_irq();
set_irq_regs(old_regs);
}
/*
* Handler for POSTED_INTERRUPT_NESTED_VECTOR.
*/
__visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
entering_ack_irq();
inc_irq_stat(kvm_posted_intr_nested_ipis);
exiting_irq();
set_irq_regs(old_regs);
}
#endif
__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
......
......@@ -150,6 +150,8 @@ static void __init apic_intr_init(void)
alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
/* IPI for KVM to deliver interrupt to wake up tasks */
alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi);
/* IPI for KVM to deliver nested posted interrupt */
alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi);
#endif
/* IPI vectors for APIC spurious and error interrupts */
......
......@@ -1495,11 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
static void cancel_hv_timer(struct kvm_lapic *apic)
{
WARN_ON(preemptible());
WARN_ON(!apic->lapic_timer.hv_timer_in_use);
preempt_disable();
kvm_x86_ops->cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
preempt_enable();
}
static bool start_hv_timer(struct kvm_lapic *apic)
......@@ -1507,6 +1506,7 @@ static bool start_hv_timer(struct kvm_lapic *apic)
struct kvm_timer *ktimer = &apic->lapic_timer;
int r;
WARN_ON(preemptible());
if (!kvm_x86_ops->set_hv_timer)
return false;
......@@ -1538,6 +1538,8 @@ static bool start_hv_timer(struct kvm_lapic *apic)
static void start_sw_timer(struct kvm_lapic *apic)
{
struct kvm_timer *ktimer = &apic->lapic_timer;
WARN_ON(preemptible());
if (apic->lapic_timer.hv_timer_in_use)
cancel_hv_timer(apic);
if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
......@@ -1552,15 +1554,20 @@ static void start_sw_timer(struct kvm_lapic *apic)
static void restart_apic_timer(struct kvm_lapic *apic)
{
preempt_disable();
if (!start_hv_timer(apic))
start_sw_timer(apic);
preempt_enable();
}
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
WARN_ON(!apic->lapic_timer.hv_timer_in_use);
preempt_disable();
/* If the preempt notifier has already run, it also called apic_timer_expired */
if (!apic->lapic_timer.hv_timer_in_use)
goto out;
WARN_ON(swait_active(&vcpu->wq));
cancel_hv_timer(apic);
apic_timer_expired(apic);
......@@ -1569,6 +1576,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
advance_periodic_target_expiration(apic);
restart_apic_timer(apic);
}
out:
preempt_enable();
}
EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
......@@ -1582,9 +1591,11 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
preempt_disable();
/* Possibly the TSC deadline timer is not enabled yet */
if (apic->lapic_timer.hv_timer_in_use)
start_sw_timer(apic);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
......
......@@ -563,7 +563,6 @@ struct vcpu_vmx {
struct kvm_vcpu vcpu;
unsigned long host_rsp;
u8 fail;
bool nmi_known_unmasked;
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
......@@ -4988,9 +4987,12 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
}
}
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
bool nested)
{
#ifdef CONFIG_SMP
int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
if (vcpu->mode == IN_GUEST_MODE) {
struct vcpu_vmx *vmx = to_vmx(vcpu);
......@@ -5008,8 +5010,7 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
*/
WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
return true;
}
#endif
......@@ -5024,7 +5025,7 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu) &&
vector == vmx->nested.posted_intr_nv) {
/* the PIR and ON have been set by L1. */
kvm_vcpu_trigger_posted_interrupt(vcpu);
kvm_vcpu_trigger_posted_interrupt(vcpu, true);
/*
* If a posted intr is not recognized by hardware,
* we will accomplish it in the next vmentry.
......@@ -5058,7 +5059,7 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
if (pi_test_and_set_on(&vmx->pi_desc))
return;
if (!kvm_vcpu_trigger_posted_interrupt(vcpu))
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
kvm_vcpu_kick(vcpu);
}
......@@ -10041,6 +10042,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->vm_entry_instruction_len);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
vmcs12->guest_interruptibility_info);
vmx->loaded_vmcs->nmi_known_unmasked =
!(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
} else {
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
}
......@@ -10065,13 +10068,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
/* Posted interrupts setting is only taken from vmcs12. */
if (nested_cpu_has_posted_intr(vmcs12)) {
/*
* Note that we use L0's vector here and in
* vmx_deliver_nested_posted_interrupt.
*/
vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
vmx->nested.pi_pending = false;
vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
} else {
exec_control &= ~PIN_BASED_POSTED_INTR;
}
......@@ -10942,7 +10941,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
*/
vmx_flush_tlb(vcpu);
}
/* Restore posted intr vector. */
if (nested_cpu_has_posted_intr(vmcs12))
vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
......
......@@ -597,8 +597,8 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
(unsigned long *)&vcpu->arch.regs_avail))
return true;
gfn = (kvm_read_cr3(vcpu) & ~31ul) >> PAGE_SHIFT;
offset = (kvm_read_cr3(vcpu) & ~31ul) & (PAGE_SIZE - 1);
gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
PFERR_USER_MASK | PFERR_WRITE_MASK);
if (r < 0)
......
......@@ -445,6 +445,7 @@ struct kvm {
struct kvm_stat_data **debugfs_stat_data;
struct srcu_struct srcu;
struct srcu_struct irq_srcu;
pid_t userspace_pid;
};
#define kvm_err(fmt, ...) \
......
......@@ -474,7 +474,7 @@ class Provider(object):
@staticmethod
def is_field_wanted(fields_filter, field):
"""Indicate whether field is valid according to fields_filter."""
if not fields_filter:
if not fields_filter or fields_filter == "help":
return True
return re.match(fields_filter, field) is not None
......@@ -1413,8 +1413,8 @@ performance.
Requirements:
- Access to:
/sys/kernel/debug/kvm
/sys/kernel/debug/trace/events/*
%s
%s/events/*
/proc/pid/task
- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
CAP_SYS_ADMIN and perf events are used.
......@@ -1434,7 +1434,7 @@ Interactive Commands:
s set update interval
x toggle reporting of stats for individual child trace events
Press any other key to refresh statistics immediately.
"""
""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
class PlainHelpFormatter(optparse.IndentedHelpFormatter):
def format_description(self, description):
......@@ -1496,7 +1496,8 @@ Press any other key to refresh statistics immediately.
action='store',
default=DEFAULT_REGEX,
dest='fields',
help='fields to display (regex)',
help='''fields to display (regex)
"-f help" for a list of available events''',
)
optparser.add_option('-p', '--pid',
action='store',
......@@ -1559,6 +1560,17 @@ def main():
stats = Stats(options)
if options.fields == "help":
event_list = "\n"
s = stats.get()
for key in s.keys():
if key.find('(') != -1:
key = key[0:key.find('(')]
if event_list.find('\n' + key + '\n') == -1:
event_list += key + '\n'
sys.stdout.write(event_list)
return ""
if options.log:
log(stats)
elif not options.once:
......
......@@ -3883,7 +3883,6 @@ static const struct file_operations *stat_fops[] = {
static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
{
struct kobj_uevent_env *env;
char *tmp, *pathbuf = NULL;
unsigned long long created, active;
if (!kvm_dev.this_device || !kvm)
......@@ -3907,38 +3906,28 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
add_uevent_var(env, "CREATED=%llu", created);
add_uevent_var(env, "COUNT=%llu", active);
if (type == KVM_EVENT_CREATE_VM)
if (type == KVM_EVENT_CREATE_VM) {
add_uevent_var(env, "EVENT=create");
else if (type == KVM_EVENT_DESTROY_VM)
kvm->userspace_pid = task_pid_nr(current);
} else if (type == KVM_EVENT_DESTROY_VM) {
add_uevent_var(env, "EVENT=destroy");
}
add_uevent_var(env, "PID=%d", kvm->userspace_pid);
if (kvm->debugfs_dentry) {
char p[ITOA_MAX_LEN];
snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
tmp = strchrnul(p + 1, '-');
*tmp = '\0';
add_uevent_var(env, "PID=%s", p);
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
if (pathbuf) {
/* sizeof counts the final '\0' */
int len = sizeof("STATS_PATH=") - 1;
const char *pvar = "STATS_PATH=";
tmp = dentry_path_raw(kvm->debugfs_dentry,
pathbuf + len,
PATH_MAX - len);
if (!IS_ERR(tmp)) {
memcpy(tmp - len, pvar, len);
env->envp[env->envp_idx++] = tmp - len;
}
char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL);
if (p) {
tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);
if (!IS_ERR(tmp))
add_uevent_var(env, "STATS_PATH=%s", tmp);
kfree(p);
}
}
/* no need for checks, since we are adding at most only 5 keys */
env->envp[env->envp_idx++] = NULL;
kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
kfree(env);
kfree(pathbuf);
}
static int kvm_init_debug(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment