Commit 239451e9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-6.3-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull xen updates from Juergen Gross:

 - help deprecate the /proc/xen files by making the related information
   available via sysfs

 - mark the Xen variants of play_dead "noreturn"

 - support a shared Xen platform interrupt

 - several small cleanups and fixes

* tag 'for-linus-6.3-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: sysfs: make kobj_type structure constant
  x86/Xen: drop leftover VM-assist uses
  xen: Replace one-element array with flexible-array member
  xen/grant-dma-iommu: Implement a dummy probe_device() callback
  xen/pvcalls-back: fix permanently masked event channel
  xen: Allow platform PCI interrupt to be shared
  x86/xen/time: prefer tsc as clocksource when it is invariant
  x86/xen: mark xen_pv_play_dead() as __noreturn
  x86/xen: don't let xen_pv_play_dead() return
  drivers/xen/hypervisor: Expose Xen SIF flags to userspace
parents b8878e5a 4ecc96cb
......@@ -120,3 +120,16 @@ Contact: xen-devel@lists.xenproject.org
Description: If running under Xen:
The Xen version is in the format <major>.<minor><extra>
This is the <minor> part of it.
What: /sys/hypervisor/start_flags/*
Date: March 2023
KernelVersion: 6.3.0
Contact: xen-devel@lists.xenproject.org
Description: If running under Xen:
All bits in Xen's start-flags are represented as
boolean files, returning '1' if set, '0' otherwise.
This takes the place of the defunct /proc/xen/capabilities,
which would contain "control_d" on dom0, and be empty
otherwise. This flag is now exposed as "initdomain" in
addition to the "privileged" flag; all other possible flags
are accessible as "unknownXX".
......@@ -934,12 +934,8 @@ void xen_enable_syscall(void)
static void __init xen_pvmmu_arch_setup(void)
{
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_pae_extended_cr3);
if (register_callback(CALLBACKTYPE_event,
xen_asm_exc_xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
......
......@@ -21,6 +21,8 @@ void xen_smp_send_reschedule(int cpu);
void xen_smp_send_call_function_ipi(const struct cpumask *mask);
void xen_smp_send_call_function_single_ipi(int cpu);
void __noreturn xen_cpu_bringup_again(unsigned long stack);
struct xen_common_irq {
int irq;
char *name;
......
......@@ -381,21 +381,12 @@ static void xen_pv_cpu_die(unsigned int cpu)
}
}
static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
cpu_bringup();
/*
* commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down)
* clears certain data that the cpu_idle loop (which called us
* and that we return from) expects. The only way to get that
* data back is to call:
*/
tick_nohz_idle_enter();
tick_nohz_idle_stop_tick_protected();
cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
xen_cpu_bringup_again((unsigned long)task_pt_regs(current));
BUG();
}
#else /* !CONFIG_HOTPLUG_CPU */
......@@ -409,7 +400,7 @@ static void xen_pv_cpu_die(unsigned int cpu)
BUG();
}
static void xen_pv_play_dead(void)
static void __noreturn xen_pv_play_dead(void)
{
BUG();
}
......
......@@ -482,15 +482,51 @@ static void xen_setup_vsyscall_time_info(void)
xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
}
/*
* Check if it is possible to safely use the tsc as a clocksource. This is
* only true if the hypervisor notifies the guest that its tsc is invariant,
* the tsc is stable, and the tsc instruction will never be emulated.
*/
static int __init xen_tsc_safe_clocksource(void)
{
u32 eax, ebx, ecx, edx;
if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)))
return 0;
if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC)))
return 0;
if (check_tsc_unstable())
return 0;
/* Leaf 4, sub-leaf 0 (0x40000x03) */
cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
/* tsc_mode = no_emulate (2) */
if (ebx != 2)
return 0;
return 1;
}
static void __init xen_time_init(void)
{
struct pvclock_vcpu_time_info *pvti;
int cpu = smp_processor_id();
struct timespec64 tp;
/* As Dom0 is never moved, no penalty on using TSC there */
/*
* As Dom0 is never moved, no penalty on using TSC there.
*
* If it is possible for the guest to determine that the tsc is a safe
* clocksource, then set xen_clocksource rating below that of the tsc
* so that the system prefers tsc instead.
*/
if (xen_initial_domain())
xen_clocksource.rating = 275;
else if (xen_tsc_safe_clocksource())
xen_clocksource.rating = 299;
clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
......
......@@ -76,6 +76,13 @@ SYM_CODE_START(asm_cpu_bringup_and_idle)
call cpu_bringup_and_idle
SYM_CODE_END(asm_cpu_bringup_and_idle)
SYM_CODE_START(xen_cpu_bringup_again)
UNWIND_HINT_FUNC
mov %rdi, %rsp
UNWIND_HINT_REGS
call cpu_bringup_and_idle
SYM_CODE_END(xen_cpu_bringup_again)
.popsection
#endif
#endif
......
......@@ -1710,9 +1710,10 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
generic_handle_irq(irq);
}
static void __xen_evtchn_do_upcall(void)
static int __xen_evtchn_do_upcall(void)
{
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE;
int cpu = smp_processor_id();
struct evtchn_loop_ctrl ctrl = { 0 };
......@@ -1737,6 +1738,8 @@ static void __xen_evtchn_do_upcall(void)
* above.
*/
__this_cpu_inc(irq_epoch);
return ret;
}
void xen_evtchn_do_upcall(struct pt_regs *regs)
......@@ -1751,9 +1754,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
set_irq_regs(old_regs);
}
void xen_hvm_evtchn_do_upcall(void)
int xen_hvm_evtchn_do_upcall(void)
{
__xen_evtchn_do_upcall();
return __xen_evtchn_do_upcall();
}
EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
......
......@@ -16,8 +16,15 @@ struct grant_dma_iommu_device {
struct iommu_device iommu;
};
/* Nothing is really needed here */
static const struct iommu_ops grant_dma_iommu_ops;
static struct iommu_device *grant_dma_iommu_probe_device(struct device *dev)
{
return ERR_PTR(-ENODEV);
}
/* Nothing is really needed here except a dummy probe_device callback */
static const struct iommu_ops grant_dma_iommu_ops = {
.probe_device = grant_dma_iommu_probe_device,
};
static const struct of_device_id grant_dma_iommu_of_match[] = {
{ .compatible = "xen,grant-dma" },
......
......@@ -64,14 +64,13 @@ static uint64_t get_callback_via(struct pci_dev *pdev)
static irqreturn_t do_hvm_evtchn_intr(int irq, void *dev_id)
{
xen_hvm_evtchn_do_upcall();
return IRQ_HANDLED;
return xen_hvm_evtchn_do_upcall();
}
static int xen_allocate_irq(struct pci_dev *pdev)
{
return request_irq(pdev->irq, do_hvm_evtchn_intr,
IRQF_NOBALANCING | IRQF_TRIGGER_RISING,
IRQF_NOBALANCING | IRQF_SHARED,
"xen-platform-pci", pdev);
}
......
......@@ -173,6 +173,8 @@ static bool pvcalls_conn_back_write(struct sock_mapping *map)
RING_IDX cons, prod, size, array_size;
int ret;
atomic_set(&map->write, 0);
cons = intf->out_cons;
prod = intf->out_prod;
/* read the indexes before dealing with the data */
......@@ -197,7 +199,6 @@ static bool pvcalls_conn_back_write(struct sock_mapping *map)
iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 2, size);
}
atomic_set(&map->write, 0);
ret = inet_sendmsg(map->sock, &msg, size);
if (ret == -EAGAIN) {
atomic_inc(&map->write);
......
......@@ -31,7 +31,10 @@ struct hyp_sysfs_attr {
struct attribute attr;
ssize_t (*show)(struct hyp_sysfs_attr *, char *);
ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
void *hyp_attr_data;
union {
void *hyp_attr_data;
unsigned long hyp_attr_value;
};
};
static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
......@@ -399,6 +402,60 @@ static int __init xen_sysfs_properties_init(void)
return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
}
#define FLAG_UNAME "unknown"
#define FLAG_UNAME_FMT FLAG_UNAME "%02u"
#define FLAG_UNAME_MAX sizeof(FLAG_UNAME "XX")
#define FLAG_COUNT (sizeof(xen_start_flags) * BITS_PER_BYTE)
static_assert(sizeof(xen_start_flags) <=
sizeof_field(struct hyp_sysfs_attr, hyp_attr_value));
static ssize_t flag_show(struct hyp_sysfs_attr *attr, char *buffer)
{
char *p = buffer;
*p++ = '0' + ((xen_start_flags & attr->hyp_attr_value) != 0);
*p++ = '\n';
return p - buffer;
}
#define FLAG_NODE(flag, node) \
[ilog2(flag)] = { \
.attr = { .name = #node, .mode = 0444 },\
.show = flag_show, \
.hyp_attr_value = flag \
}
/*
* Add new, known flags here. No other changes are required, but
* note that each known flag wastes one entry in flag_unames[].
* The code/complexity machinations to avoid this isn't worth it
* for a few entries, but keep it in mind.
*/
static struct hyp_sysfs_attr flag_attrs[FLAG_COUNT] = {
FLAG_NODE(SIF_PRIVILEGED, privileged),
FLAG_NODE(SIF_INITDOMAIN, initdomain)
};
static struct attribute_group xen_flags_group = {
.name = "start_flags",
.attrs = (struct attribute *[FLAG_COUNT + 1]){}
};
static char flag_unames[FLAG_COUNT][FLAG_UNAME_MAX];
static int __init xen_sysfs_flags_init(void)
{
for (unsigned fnum = 0; fnum != FLAG_COUNT; fnum++) {
if (likely(flag_attrs[fnum].attr.name == NULL)) {
sprintf(flag_unames[fnum], FLAG_UNAME_FMT, fnum);
flag_attrs[fnum].attr.name = flag_unames[fnum];
flag_attrs[fnum].attr.mode = 0444;
flag_attrs[fnum].show = flag_show;
flag_attrs[fnum].hyp_attr_value = 1 << fnum;
}
xen_flags_group.attrs[fnum] = &flag_attrs[fnum].attr;
}
return sysfs_create_group(hypervisor_kobj, &xen_flags_group);
}
#ifdef CONFIG_XEN_HAVE_VPMU
struct pmu_mode {
const char *name;
......@@ -539,18 +596,22 @@ static int __init hyper_sysfs_init(void)
ret = xen_sysfs_properties_init();
if (ret)
goto prop_out;
ret = xen_sysfs_flags_init();
if (ret)
goto flags_out;
#ifdef CONFIG_XEN_HAVE_VPMU
if (xen_initial_domain()) {
ret = xen_sysfs_pmu_init();
if (ret) {
sysfs_remove_group(hypervisor_kobj,
&xen_properties_group);
goto prop_out;
sysfs_remove_group(hypervisor_kobj, &xen_flags_group);
goto flags_out;
}
}
#endif
goto out;
flags_out:
sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
prop_out:
sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
uuid_out:
......@@ -594,7 +655,7 @@ static const struct sysfs_ops hyp_sysfs_ops = {
.store = hyp_sysfs_store,
};
static struct kobj_type hyp_sysfs_kobj_type = {
static const struct kobj_type hyp_sysfs_kobj_type = {
.sysfs_ops = &hyp_sysfs_ops,
};
......
......@@ -30,7 +30,7 @@
struct xen_page_directory {
grant_ref_t gref_dir_next_page;
#define XEN_GREF_LIST_END 0
grant_ref_t gref[1]; /* Variable length */
grant_ref_t gref[]; /* Variable length */
};
/**
......
......@@ -107,7 +107,7 @@ evtchn_port_t evtchn_from_irq(unsigned irq);
int xen_set_callback_via(uint64_t via);
void xen_evtchn_do_upcall(struct pt_regs *regs);
void xen_hvm_evtchn_do_upcall(void);
int xen_hvm_evtchn_do_upcall(void);
/* Bind a pirq for a physical interrupt to an irq. */
int xen_bind_pirq_gsi_to_irq(unsigned gsi,
......
......@@ -186,6 +186,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"snp_abort",
"stop_this_cpu",
"usercopy_abort",
"xen_cpu_bringup_again",
"xen_start_kernel",
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment