Commit c68dc1b5 authored by Oliver Upton's avatar Oliver Upton Committed by Paolo Bonzini

KVM: x86: Report host tsc and realtime values in KVM_GET_CLOCK

Handling the migration of TSCs correctly is difficult, in part because
Linux does not provide userspace with the ability to retrieve a (TSC,
realtime) clock pair for a single instant in time. In lieu of a more
convenient facility, KVM can report similar information in the kvm_clock
structure.

Provide userspace with a host TSC & realtime pair iff the realtime clock
is based on the TSC. If userspace provides KVM_SET_CLOCK with a valid
realtime value, advance the KVM clock by the amount of elapsed time. Do
not step the KVM clock backwards, though, as it is a monotonic
oscillator.
Suggested-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Signed-off-by: default avatarOliver Upton <oupton@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210916181538.968978-5-oupton@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 3d5e7a28
...@@ -1010,20 +1010,37 @@ such as migration. ...@@ -1010,20 +1010,37 @@ such as migration.
When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
set of bits that KVM can return in struct kvm_clock_data's flag member. set of bits that KVM can return in struct kvm_clock_data's flag member.
The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned The following flags are defined:
value is the exact kvmclock value seen by all VCPUs at the instant
when KVM_GET_CLOCK was called. If clear, the returned value is simply KVM_CLOCK_TSC_STABLE
CLOCK_MONOTONIC plus a constant offset; the offset can be modified If set, the returned value is the exact kvmclock
with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock, value seen by all VCPUs at the instant when KVM_GET_CLOCK was called.
but the exact value read by each VCPU could differ, because the host If clear, the returned value is simply CLOCK_MONOTONIC plus a constant
TSC is not stable. offset; the offset can be modified with KVM_SET_CLOCK. KVM will try
to make all VCPUs follow this clock, but the exact value read by each
VCPU could differ, because the host TSC is not stable.
KVM_CLOCK_REALTIME
If set, the `realtime` field in the kvm_clock_data
structure is populated with the value of the host's real time
clocksource at the instant when KVM_GET_CLOCK was called. If clear,
the `realtime` field does not contain a value.
KVM_CLOCK_HOST_TSC
If set, the `host_tsc` field in the kvm_clock_data
structure is populated with the value of the host's timestamp counter (TSC)
at the instant when KVM_GET_CLOCK was called. If clear, the `host_tsc` field
does not contain a value.
:: ::
struct kvm_clock_data { struct kvm_clock_data {
__u64 clock; /* kvmclock current value */ __u64 clock; /* kvmclock current value */
__u32 flags; __u32 flags;
__u32 pad[9]; __u32 pad0;
__u64 realtime;
__u64 host_tsc;
__u32 pad[4];
}; };
...@@ -1040,12 +1057,25 @@ Sets the current timestamp of kvmclock to the value specified in its parameter. ...@@ -1040,12 +1057,25 @@ Sets the current timestamp of kvmclock to the value specified in its parameter.
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
such as migration. such as migration.
The following flags can be passed:
KVM_CLOCK_REALTIME
If set, KVM will compare the value of the `realtime` field
with the value of the host's real time clocksource at the instant when
KVM_SET_CLOCK was called. The difference in elapsed time is added to the final
kvmclock value that will be provided to guests.
Other flags returned by ``KVM_GET_CLOCK`` are accepted but ignored.
:: ::
struct kvm_clock_data { struct kvm_clock_data {
__u64 clock; /* kvmclock current value */ __u64 clock; /* kvmclock current value */
__u32 flags; __u32 flags;
__u32 pad[9]; __u32 pad0;
__u64 realtime;
__u64 host_tsc;
__u32 pad[4];
}; };
......
...@@ -1942,4 +1942,7 @@ int kvm_cpu_dirty_log_size(void); ...@@ -1942,4 +1942,7 @@ int kvm_cpu_dirty_log_size(void);
int alloc_all_memslots_rmaps(struct kvm *kvm); int alloc_all_memslots_rmaps(struct kvm *kvm);
#define KVM_CLOCK_VALID_FLAGS \
(KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
#endif /* _ASM_X86_KVM_HOST_H */ #endif /* _ASM_X86_KVM_HOST_H */
...@@ -2787,6 +2787,7 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) ...@@ -2787,6 +2787,7 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
struct pvclock_vcpu_time_info hv_clock; struct pvclock_vcpu_time_info hv_clock;
unsigned long flags; unsigned long flags;
data->flags = 0;
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
if (!ka->use_master_clock) { if (!ka->use_master_clock) {
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
...@@ -2803,10 +2804,20 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) ...@@ -2803,10 +2804,20 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
get_cpu(); get_cpu();
if (__this_cpu_read(cpu_tsc_khz)) { if (__this_cpu_read(cpu_tsc_khz)) {
#ifdef CONFIG_X86_64
struct timespec64 ts;
if (kvm_get_walltime_and_clockread(&ts, &data->host_tsc)) {
data->realtime = ts.tv_nsec + NSEC_PER_SEC * ts.tv_sec;
data->flags |= KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC;
} else
#endif
data->host_tsc = rdtsc();
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift, &hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul); &hv_clock.tsc_to_system_mul);
data->clock = __pvclock_read_cycles(&hv_clock, rdtsc()); data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
} else { } else {
data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset; data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
} }
...@@ -2818,12 +2829,6 @@ u64 get_kvmclock_ns(struct kvm *kvm) ...@@ -2818,12 +2829,6 @@ u64 get_kvmclock_ns(struct kvm *kvm)
{ {
struct kvm_clock_data data; struct kvm_clock_data data;
/*
* Zero flags as it's accessed RMW, leave everything else uninitialized
* as clock is always written and no other fields are consumed.
*/
data.flags = 0;
get_kvmclock(kvm, &data); get_kvmclock(kvm, &data);
return data.clock; return data.clock;
} }
...@@ -4050,7 +4055,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -4050,7 +4055,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_SYNC_X86_VALID_FIELDS; r = KVM_SYNC_X86_VALID_FIELDS;
break; break;
case KVM_CAP_ADJUST_CLOCK: case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE; r = KVM_CLOCK_VALID_FLAGS;
break; break;
case KVM_CAP_X86_DISABLE_EXITS: case KVM_CAP_X86_DISABLE_EXITS:
r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE | r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
...@@ -5847,12 +5852,16 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp) ...@@ -5847,12 +5852,16 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
{ {
struct kvm_arch *ka = &kvm->arch; struct kvm_arch *ka = &kvm->arch;
struct kvm_clock_data data; struct kvm_clock_data data;
u64 now_ns; u64 now_raw_ns;
if (copy_from_user(&data, argp, sizeof(data))) if (copy_from_user(&data, argp, sizeof(data)))
return -EFAULT; return -EFAULT;
if (data.flags) /*
* Only KVM_CLOCK_REALTIME is used, but allow passing the
* result of KVM_GET_CLOCK back to KVM_SET_CLOCK.
*/
if (data.flags & ~KVM_CLOCK_VALID_FLAGS)
return -EINVAL; return -EINVAL;
kvm_hv_invalidate_tsc_page(kvm); kvm_hv_invalidate_tsc_page(kvm);
...@@ -5866,11 +5875,21 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp) ...@@ -5866,11 +5875,21 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
* is slightly ahead) here we risk going negative on unsigned * is slightly ahead) here we risk going negative on unsigned
* 'system_time' when 'data.clock' is very small. * 'system_time' when 'data.clock' is very small.
*/ */
if (kvm->arch.use_master_clock) if (data.flags & KVM_CLOCK_REALTIME) {
now_ns = ka->master_kernel_ns; u64 now_real_ns = ktime_get_real_ns();
/*
* Avoid stepping the kvmclock backwards.
*/
if (now_real_ns > data.realtime)
data.clock += now_real_ns - data.realtime;
}
if (ka->use_master_clock)
now_raw_ns = ka->master_kernel_ns;
else else
now_ns = get_kvmclock_base_ns(); now_raw_ns = get_kvmclock_base_ns();
ka->kvmclock_offset = data.clock - now_ns; ka->kvmclock_offset = data.clock - now_raw_ns;
kvm_end_pvclock_update(kvm); kvm_end_pvclock_update(kvm);
return 0; return 0;
} }
......
...@@ -1231,11 +1231,16 @@ struct kvm_irqfd { ...@@ -1231,11 +1231,16 @@ struct kvm_irqfd {
/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */
#define KVM_CLOCK_TSC_STABLE 2 #define KVM_CLOCK_TSC_STABLE 2
#define KVM_CLOCK_REALTIME (1 << 2)
#define KVM_CLOCK_HOST_TSC (1 << 3)
struct kvm_clock_data { struct kvm_clock_data {
__u64 clock; __u64 clock;
__u32 flags; __u32 flags;
__u32 pad[9]; __u32 pad0;
__u64 realtime;
__u64 host_tsc;
__u32 pad[4];
}; };
/* For KVM_CAP_SW_TLB */ /* For KVM_CAP_SW_TLB */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment