Commit 04471d3f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linux-5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml

Pull UML updates from Richard Weinberger:

 - Many cleanups and fixes for our virtio code

 - Add support for a pseudo RTC

 - Fix for a possible jailbreak

 - Minor fixes (spelling, header files)

* tag 'for-linux-5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
  um: irq.h: include <asm-generic/irq.h>
  um: io.h: include <linux/types.h>
  um: add a pseudo RTC
  um: remove process stub VMA
  um: rework userspace stubs to not hard-code stub location
  um: separate child and parent errors in clone stub
  um: defer killing userspace on page table update failures
  um: mm: check more comprehensively for stub changes
  um: print register names in wait_for_stub
  um: hostfs: use a kmem cache for inodes
  mm: Remove arch_remap() and mm-arch-hooks.h
  um: fix spelling mistake in Kconfig "privleges" -> "privileges"
  um: virtio: allow devices to be configured for wakeup
  um: time-travel: rework interrupt handling in ext mode
  um: virtio: disable VQs during suspend
  um: virtio: fix handling of messages without payload
  um: virtio: clean up a comment
parents df24212a ddad5187
......@@ -323,7 +323,7 @@ config UML_NET_SLIRP
frames. In general, slirp allows the UML the same IP connectivity
to the outside world that the host user is permitted, and unlike
other transports, SLiRP works without the need of root level
privleges, setuid binaries, or SLIP devices on the host. This
privileges, setuid binaries, or SLIP devices on the host. This
also means not every type of connection is possible, but most
situations can be accommodated with carefully crafted slirp
commands that can be passed along as part of the network device's
......@@ -346,3 +346,14 @@ config VIRTIO_UML
help
This driver provides support for virtio based paravirtual device
drivers over vhost-user sockets.
config UML_RTC
bool "UML RTC driver"
depends on RTC_CLASS
# there's no use in this if PM_SLEEP isn't enabled ...
depends on PM_SLEEP
help
When PM_SLEEP is configured, it may be desirable to wake up using
rtcwake, especially in time-travel mode. This driver enables that
by providing a fake RTC clock that causes a wakeup at the right
time.
......@@ -17,6 +17,7 @@ hostaudio-objs := hostaudio_kern.o
ubd-objs := ubd_kern.o ubd_user.o
port-objs := port_kern.o port_user.o
harddog-objs := harddog_kern.o harddog_user.o
rtc-objs := rtc_kern.o rtc_user.o
LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
......@@ -62,6 +63,7 @@ obj-$(CONFIG_UML_WATCHDOG) += harddog.o
obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
obj-$(CONFIG_UML_RANDOM) += random.o
obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
obj-$(CONFIG_UML_RTC) += rtc.o
# pcap_user.o must be added explicitly.
USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#ifndef __UM_RTC_H__
#define __UM_RTC_H__
int uml_rtc_start(bool timetravel);
int uml_rtc_enable_alarm(unsigned long long delta_seconds);
void uml_rtc_disable_alarm(void);
void uml_rtc_stop(bool timetravel);
void uml_rtc_send_timetravel_alarm(void);
#endif /* __UM_RTC_H__ */
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#include <linux/platform_device.h>
#include <linux/time-internal.h>
#include <linux/suspend.h>
#include <linux/err.h>
#include <linux/rtc.h>
#include <kern_util.h>
#include <irq_kern.h>
#include <os.h>
#include "rtc.h"
static time64_t uml_rtc_alarm_time;
static bool uml_rtc_alarm_enabled;
static struct rtc_device *uml_rtc;
static int uml_rtc_irq_fd, uml_rtc_irq;
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
static void uml_rtc_time_travel_alarm(struct time_travel_event *ev)
{
uml_rtc_send_timetravel_alarm();
}
static struct time_travel_event uml_rtc_alarm_event = {
.fn = uml_rtc_time_travel_alarm,
};
#endif
static int uml_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
struct timespec64 ts;
/* Use this to get correct time in time-travel mode */
read_persistent_clock64(&ts);
rtc_time64_to_tm(timespec64_to_ktime(ts) / NSEC_PER_SEC, tm);
return 0;
}
static int uml_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
{
rtc_time64_to_tm(uml_rtc_alarm_time, &alrm->time);
alrm->enabled = uml_rtc_alarm_enabled;
return 0;
}
static int uml_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
{
unsigned long long secs;
if (!enable && !uml_rtc_alarm_enabled)
return 0;
uml_rtc_alarm_enabled = enable;
secs = uml_rtc_alarm_time - ktime_get_real_seconds();
if (time_travel_mode == TT_MODE_OFF) {
if (!enable) {
uml_rtc_disable_alarm();
return 0;
}
/* enable or update */
return uml_rtc_enable_alarm(secs);
} else {
time_travel_del_event(&uml_rtc_alarm_event);
if (enable)
time_travel_add_event_rel(&uml_rtc_alarm_event,
secs * NSEC_PER_SEC);
}
return 0;
}
static int uml_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
{
uml_rtc_alarm_irq_enable(dev, 0);
uml_rtc_alarm_time = rtc_tm_to_time64(&alrm->time);
uml_rtc_alarm_irq_enable(dev, alrm->enabled);
return 0;
}
static const struct rtc_class_ops uml_rtc_ops = {
.read_time = uml_rtc_read_time,
.read_alarm = uml_rtc_read_alarm,
.alarm_irq_enable = uml_rtc_alarm_irq_enable,
.set_alarm = uml_rtc_set_alarm,
};
static irqreturn_t uml_rtc_interrupt(int irq, void *data)
{
unsigned long long c = 0;
/* alarm triggered, it's now off */
uml_rtc_alarm_enabled = false;
os_read_file(uml_rtc_irq_fd, &c, sizeof(c));
WARN_ON(c == 0);
pm_system_wakeup();
rtc_update_irq(uml_rtc, 1, RTC_IRQF | RTC_AF);
return IRQ_HANDLED;
}
static int uml_rtc_setup(void)
{
int err;
err = uml_rtc_start(time_travel_mode != TT_MODE_OFF);
if (WARN(err < 0, "err = %d\n", err))
return err;
uml_rtc_irq_fd = err;
err = um_request_irq(UM_IRQ_ALLOC, uml_rtc_irq_fd, IRQ_READ,
uml_rtc_interrupt, 0, "rtc", NULL);
if (err < 0) {
uml_rtc_stop(time_travel_mode != TT_MODE_OFF);
return err;
}
irq_set_irq_wake(err, 1);
uml_rtc_irq = err;
return 0;
}
static void uml_rtc_cleanup(void)
{
um_free_irq(uml_rtc_irq, NULL);
uml_rtc_stop(time_travel_mode != TT_MODE_OFF);
}
static int uml_rtc_probe(struct platform_device *pdev)
{
int err;
err = uml_rtc_setup();
if (err)
return err;
uml_rtc = devm_rtc_allocate_device(&pdev->dev);
if (IS_ERR(uml_rtc)) {
err = PTR_ERR(uml_rtc);
goto cleanup;
}
uml_rtc->ops = &uml_rtc_ops;
device_init_wakeup(&pdev->dev, 1);
err = devm_rtc_register_device(uml_rtc);
if (err)
goto cleanup;
return 0;
cleanup:
uml_rtc_cleanup();
return err;
}
static int uml_rtc_remove(struct platform_device *pdev)
{
device_init_wakeup(&pdev->dev, 0);
uml_rtc_cleanup();
return 0;
}
static struct platform_driver uml_rtc_driver = {
.probe = uml_rtc_probe,
.remove = uml_rtc_remove,
.driver = {
.name = "uml-rtc",
},
};
static int __init uml_rtc_init(void)
{
struct platform_device *pdev;
int err;
err = platform_driver_register(&uml_rtc_driver);
if (err)
return err;
pdev = platform_device_alloc("uml-rtc", 0);
if (!pdev) {
err = -ENOMEM;
goto unregister;
}
err = platform_device_add(pdev);
if (err)
goto unregister;
return 0;
unregister:
platform_device_put(pdev);
platform_driver_unregister(&uml_rtc_driver);
return err;
}
device_initcall(uml_rtc_init);
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#include <os.h>
#include <errno.h>
#include <sched.h>
#include <unistd.h>
#include <kern_util.h>
#include <sys/select.h>
#include <stdio.h>
#include <sys/timerfd.h>
#include "rtc.h"
static int uml_rtc_irq_fds[2];
void uml_rtc_send_timetravel_alarm(void)
{
unsigned long long c = 1;
CATCH_EINTR(write(uml_rtc_irq_fds[1], &c, sizeof(c)));
}
int uml_rtc_start(bool timetravel)
{
int err;
if (timetravel) {
int err = os_pipe(uml_rtc_irq_fds, 1, 1);
if (err)
goto fail;
} else {
uml_rtc_irq_fds[0] = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC);
if (uml_rtc_irq_fds[0] < 0) {
err = -errno;
goto fail;
}
/* apparently timerfd won't send SIGIO, use workaround */
sigio_broken(uml_rtc_irq_fds[0]);
err = add_sigio_fd(uml_rtc_irq_fds[0]);
if (err < 0) {
close(uml_rtc_irq_fds[0]);
goto fail;
}
}
return uml_rtc_irq_fds[0];
fail:
uml_rtc_stop(timetravel);
return err;
}
int uml_rtc_enable_alarm(unsigned long long delta_seconds)
{
struct itimerspec it = {
.it_value = {
.tv_sec = delta_seconds,
},
};
if (timerfd_settime(uml_rtc_irq_fds[0], 0, &it, NULL))
return -errno;
return 0;
}
void uml_rtc_disable_alarm(void)
{
uml_rtc_enable_alarm(0);
}
void uml_rtc_stop(bool timetravel)
{
if (timetravel)
os_close_file(uml_rtc_irq_fds[1]);
else
ignore_sigio_fd(uml_rtc_irq_fds[0]);
os_close_file(uml_rtc_irq_fds[0]);
}
......@@ -55,16 +55,16 @@ struct virtio_uml_device {
u64 protocol_features;
u8 status;
u8 registered:1;
u8 suspended:1;
u8 config_changed_irq:1;
uint64_t vq_irq_vq_map;
};
struct virtio_uml_vq_info {
int kick_fd, call_fd;
char name[32];
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
struct virtqueue *vq;
vq_callback_t *callback;
struct time_travel_event defer;
#endif
bool suspended;
};
extern unsigned long long physmem_size, highmem;
......@@ -97,6 +97,9 @@ static int full_read(int fd, void *buf, int len, bool abortable)
{
int rc;
if (!len)
return 0;
do {
rc = os_read_file(fd, buf, len);
if (rc > 0) {
......@@ -347,9 +350,9 @@ static void vhost_user_reply(struct virtio_uml_device *vu_dev,
rc, size);
}
static irqreturn_t vu_req_interrupt(int irq, void *data)
static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
struct time_travel_event *ev)
{
struct virtio_uml_device *vu_dev = data;
struct virtqueue *vq;
int response = 1;
struct {
......@@ -367,14 +370,14 @@ static irqreturn_t vu_req_interrupt(int irq, void *data)
switch (msg.msg.header.request) {
case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
virtio_config_changed(&vu_dev->vdev);
vu_dev->config_changed_irq = true;
response = 0;
break;
case VHOST_USER_SLAVE_VRING_CALL:
virtio_device_for_each_vq((&vu_dev->vdev), vq) {
if (vq->index == msg.msg.payload.vring_state.index) {
response = 0;
vring_interrupt(0 /* ignored */, vq);
vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index);
break;
}
}
......@@ -388,12 +391,45 @@ static irqreturn_t vu_req_interrupt(int irq, void *data)
msg.msg.header.request);
}
if (ev && !vu_dev->suspended)
time_travel_add_irq_event(ev);
if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
vhost_user_reply(vu_dev, &msg.msg, response);
return IRQ_HANDLED;
}
static irqreturn_t vu_req_interrupt(int irq, void *data)
{
struct virtio_uml_device *vu_dev = data;
irqreturn_t ret = IRQ_HANDLED;
if (!um_irq_timetravel_handler_used())
ret = vu_req_read_message(vu_dev, NULL);
if (vu_dev->vq_irq_vq_map) {
struct virtqueue *vq;
virtio_device_for_each_vq((&vu_dev->vdev), vq) {
if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index))
vring_interrupt(0 /* ignored */, vq);
}
vu_dev->vq_irq_vq_map = 0;
} else if (vu_dev->config_changed_irq) {
virtio_config_changed(&vu_dev->vdev);
vu_dev->config_changed_irq = false;
}
return ret;
}
static void vu_req_interrupt_comm_handler(int irq, int fd, void *data,
struct time_travel_event *ev)
{
vu_req_read_message(data, ev);
}
static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
{
int rc, req_fds[2];
......@@ -404,9 +440,10 @@ static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
return rc;
vu_dev->req_fd = req_fds[0];
rc = um_request_irq(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
vu_req_interrupt, IRQF_SHARED,
vu_dev->pdev->name, vu_dev);
rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
vu_req_interrupt, IRQF_SHARED,
vu_dev->pdev->name, vu_dev,
vu_req_interrupt_comm_handler);
if (rc < 0)
goto err_close;
......@@ -722,6 +759,9 @@ static bool vu_notify(struct virtqueue *vq)
const uint64_t n = 1;
int rc;
if (info->suspended)
return true;
time_travel_propagate_time();
if (info->kick_fd < 0) {
......@@ -875,23 +915,6 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
return rc;
}
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
static void vu_defer_irq_handle(struct time_travel_event *d)
{
struct virtio_uml_vq_info *info;
info = container_of(d, struct virtio_uml_vq_info, defer);
info->callback(info->vq);
}
static void vu_defer_irq_callback(struct virtqueue *vq)
{
struct virtio_uml_vq_info *info = vq->priv;
time_travel_add_irq_event(&info->defer);
}
#endif
static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
unsigned index, vq_callback_t *callback,
const char *name, bool ctx)
......@@ -911,19 +934,6 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
pdev->id, name);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
/*
* When we get an interrupt, we must bounce it through the simulation
* calendar (the simtime device), except for the simtime device itself
* since that's part of the simulation control.
*/
if (time_travel_mode == TT_MODE_EXTERNAL && callback) {
info->callback = callback;
callback = vu_defer_irq_callback;
time_travel_set_event_fn(&info->defer, vu_defer_irq_handle);
}
#endif
vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
ctx, vu_notify, callback, info->name);
if (!vq) {
......@@ -932,9 +942,6 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
}
vq->priv = info;
num = virtqueue_get_vring_size(vq);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
info->vq = vq;
#endif
if (vu_dev->protocol_features &
BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
......@@ -993,6 +1000,10 @@ static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
int i, queue_idx = 0, rc;
struct virtqueue *vq;
/* not supported for now */
if (WARN_ON(nvqs > 64))
return -EINVAL;
rc = vhost_user_set_mem_table(vu_dev);
if (rc)
return rc;
......@@ -1125,6 +1136,8 @@ static int virtio_uml_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, vu_dev);
device_set_wakeup_capable(&vu_dev->vdev.dev, true);
rc = register_virtio_device(&vu_dev->vdev);
if (rc)
put_device(&vu_dev->vdev.dev);
......@@ -1286,6 +1299,46 @@ static const struct of_device_id virtio_uml_match[] = {
};
MODULE_DEVICE_TABLE(of, virtio_uml_match);
static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
{
struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
struct virtqueue *vq;
virtio_device_for_each_vq((&vu_dev->vdev), vq) {
struct virtio_uml_vq_info *info = vq->priv;
info->suspended = true;
vhost_user_set_vring_enable(vu_dev, vq->index, false);
}
if (!device_may_wakeup(&vu_dev->vdev.dev)) {
vu_dev->suspended = true;
return 0;
}
return irq_set_irq_wake(vu_dev->irq, 1);
}
static int virtio_uml_resume(struct platform_device *pdev)
{
struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
struct virtqueue *vq;
virtio_device_for_each_vq((&vu_dev->vdev), vq) {
struct virtio_uml_vq_info *info = vq->priv;
info->suspended = false;
vhost_user_set_vring_enable(vu_dev, vq->index, true);
}
vu_dev->suspended = false;
if (!device_may_wakeup(&vu_dev->vdev.dev))
return 0;
return irq_set_irq_wake(vu_dev->irq, 0);
}
static struct platform_driver virtio_uml_driver = {
.probe = virtio_uml_probe,
.remove = virtio_uml_remove,
......@@ -1293,6 +1346,8 @@ static struct platform_driver virtio_uml_driver = {
.name = "virtio-uml",
.of_match_table = virtio_uml_match,
},
.suspend = virtio_uml_suspend,
.resume = virtio_uml_resume,
};
static int __init virtio_uml_init(void)
......
......@@ -14,7 +14,6 @@ generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kdebug.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += mmiowb.h
generic-y += module.lds.h
generic-y += param.h
......@@ -26,3 +25,4 @@ generic-y += topology.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
generic-y += kprobes.h
generic-y += mm_hooks.h
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_UM_IO_H
#define _ASM_UM_IO_H
#include <linux/types.h>
#define ioremap ioremap
static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
......
......@@ -33,4 +33,5 @@
#define NR_IRQS 64
#include <asm-generic/irq.h>
#endif
......@@ -10,33 +10,9 @@
#include <linux/mm_types.h>
#include <linux/mmap_lock.h>
#include <asm/mm_hooks.h>
#include <asm/mmu.h>
extern void uml_setup_stubs(struct mm_struct *mm);
/*
* Needed since we do not use the asm-generic/mm_hooks.h:
*/
static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
uml_setup_stubs(mm);
return 0;
}
extern void arch_exit_mmap(struct mm_struct *mm);
static inline void arch_unmap(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
}
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
/* by default, allow everything */
return true;
}
/*
* end asm-generic/mm_hooks.h functions
*/
extern void force_flush_all(void);
#define activate_mm activate_mm
......@@ -47,9 +23,6 @@ static inline void activate_mm(struct mm_struct *old, struct mm_struct *new)
* when the new ->mm is used for the first time.
*/
__switch_mm(&new->context.id);
mmap_write_lock_nested(new, SINGLE_DEPTH_NESTING);
uml_setup_stubs(new);
mmap_write_unlock(new);
}
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
......
......@@ -7,6 +7,7 @@
#ifndef __TIMER_INTERNAL_H__
#define __TIMER_INTERNAL_H__
#include <linux/list.h>
#include <asm/bug.h>
#define TIMER_MULTIPLIER 256
#define TIMER_MIN_DELTA 500
......@@ -54,6 +55,9 @@ static inline void time_travel_wait_readable(int fd)
}
void time_travel_add_irq_event(struct time_travel_event *e);
void time_travel_add_event_rel(struct time_travel_event *e,
unsigned long long delay_ns);
bool time_travel_del_event(struct time_travel_event *e);
#else
struct time_travel_event {
};
......@@ -74,6 +78,19 @@ static inline void time_travel_propagate_time(void)
static inline void time_travel_wait_readable(int fd)
{
}
static inline void time_travel_add_irq_event(struct time_travel_event *e)
{
WARN_ON(1);
}
/*
* not inlines so the data structure need not exist,
* cause linker failures
*/
extern void time_travel_not_configured(void);
#define time_travel_add_event_rel(...) time_travel_not_configured()
#define time_travel_del_event(...) time_travel_not_configured()
#endif /* CONFIG_UML_TIME_TRAVEL_SUPPORT */
/*
......
......@@ -20,18 +20,10 @@
* 'UL' and other type specifiers unilaterally. We
* use the following macros to deal with this.
*/
#ifdef __ASSEMBLY__
#define _UML_AC(X, Y) (Y)
#else
#define __UML_AC(X, Y) (X(Y))
#define _UML_AC(X, Y) __UML_AC(X, Y)
#endif
#define STUB_START _UML_AC(, 0x100000)
#define STUB_CODE _UML_AC((unsigned long), STUB_START)
#define STUB_DATA _UML_AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE)
#define STUB_END _UML_AC((unsigned long), STUB_DATA + UM_KERN_PAGE_SIZE)
#define STUB_START stub_start
#define STUB_CODE STUB_START
#define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE)
#define STUB_END (STUB_DATA + UM_KERN_PAGE_SIZE)
#ifndef __ASSEMBLY__
......@@ -54,6 +46,7 @@ extern unsigned long long highmem;
extern unsigned long brk_start;
extern unsigned long host_task_size;
extern unsigned long stub_start;
extern int linux_main(int argc, char **argv);
extern void uml_finishsetup(void);
......
/* SPDX-License-Identifier: GPL-2.0 */
/* for use by sys-$SUBARCH/kernel-offsets.c */
#include <stub-data.h>
DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
......@@ -43,3 +44,8 @@ DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
#endif
/* for stub */
DEFINE(UML_STUB_FIELD_OFFSET, offsetof(struct stub_data, offset));
DEFINE(UML_STUB_FIELD_CHILD_ERR, offsetof(struct stub_data, child_err));
DEFINE(UML_STUB_FIELD_FD, offsetof(struct stub_data, fd));
......@@ -7,6 +7,7 @@
#define __IRQ_KERN_H__
#include <linux/interrupt.h>
#include <linux/time-internal.h>
#include <asm/ptrace.h>
#include "irq_user.h"
......@@ -15,5 +16,64 @@
int um_request_irq(int irq, int fd, enum um_irq_type type,
irq_handler_t handler, unsigned long irqflags,
const char *devname, void *dev_id);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
/**
* um_request_irq_tt - request an IRQ with timetravel handler
*
* @irq: the IRQ number, or %UM_IRQ_ALLOC
* @fd: The file descriptor to request an IRQ for
* @type: read or write
* @handler: the (generic style) IRQ handler
* @irqflags: Linux IRQ flags
* @devname: name for this to show
* @dev_id: data pointer to pass to the IRQ handler
* @timetravel_handler: the timetravel interrupt handler, invoked with the IRQ
* number, fd, dev_id and time-travel event pointer.
*
* Returns: The interrupt number assigned or a negative error.
*
* Note that the timetravel handler is invoked only if the time_travel_mode is
* %TT_MODE_EXTERNAL, and then it is invoked even while the system is suspended!
* This function must call time_travel_add_irq_event() for the event passed with
* an appropriate delay, before sending an ACK on the socket it was invoked for.
*
* If this was called while the system is suspended, then adding the event will
* cause the system to resume.
*
* Since this function will almost certainly have to handle the FD's condition,
* a read will consume the message, and after that it is up to the code using
* it to pass such a message to the @handler in whichever way it can.
*
* If time_travel_mode is not %TT_MODE_EXTERNAL the @timetravel_handler will
* not be invoked at all and the @handler must handle the FD becoming
* readable (or writable) instead. Use um_irq_timetravel_handler_used() to
* distinguish these cases.
*
* See virtio_uml.c for an example.
*/
int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
irq_handler_t handler, unsigned long irqflags,
const char *devname, void *dev_id,
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *));
#else
static inline
int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
irq_handler_t handler, unsigned long irqflags,
const char *devname, void *dev_id,
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *))
{
return um_request_irq(irq, fd, type, handler, irqflags,
devname, dev_id);
}
#endif
static inline bool um_irq_timetravel_handler_used(void)
{
return time_travel_mode == TT_MODE_EXTERNAL;
}
void um_free_irq(int irq, void *dev_id);
#endif
......@@ -12,6 +12,7 @@ struct mm_id {
int pid;
} u;
unsigned long stack;
int kill;
};
#endif
......@@ -11,7 +11,7 @@
struct stub_data {
unsigned long offset;
int fd;
long err;
long parent_err, child_err;
};
#endif
......@@ -26,9 +26,7 @@ void flush_thread(void)
arch_flush_thread(&current->thread.arch);
ret = unmap(&current->mm->context.id, 0, STUB_START, 0, &data);
ret = ret || unmap(&current->mm->context.id, STUB_END,
host_task_size - STUB_END, 1, &data);
ret = unmap(&current->mm->context.id, 0, TASK_SIZE, 1, &data);
if (ret) {
printk(KERN_ERR "flush_thread - clearing address space failed, "
"err = %d\n", ret);
......
......@@ -20,7 +20,7 @@
#include <os.h>
#include <irq_user.h>
#include <irq_kern.h>
#include <as-layout.h>
#include <linux/time-internal.h>
extern void free_irqs(void);
......@@ -38,6 +38,12 @@ struct irq_reg {
bool active;
bool pending;
bool wakeup;
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
bool pending_on_resume;
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *);
struct time_travel_event event;
#endif
};
struct irq_entry {
......@@ -51,6 +57,7 @@ struct irq_entry {
static DEFINE_SPINLOCK(irq_lock);
static LIST_HEAD(active_fds);
static DECLARE_BITMAP(irqs_allocated, NR_IRQS);
static bool irqs_suspended;
static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
{
......@@ -74,9 +81,65 @@ static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
}
}
void sigio_handler_suspend(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
static void irq_event_handler(struct time_travel_event *ev)
{
/* nothing */
struct irq_reg *reg = container_of(ev, struct irq_reg, event);
/* do nothing if suspended - just to cause a wakeup */
if (irqs_suspended)
return;
generic_handle_irq(reg->irq);
}
static bool irq_do_timetravel_handler(struct irq_entry *entry,
enum um_irq_type t)
{
struct irq_reg *reg = &entry->reg[t];
if (!reg->timetravel_handler)
return false;
/* prevent nesting - we'll get it again later when we SIGIO ourselves */
if (reg->pending_on_resume)
return true;
reg->timetravel_handler(reg->irq, entry->fd, reg->id, &reg->event);
if (!reg->event.pending)
return false;
if (irqs_suspended)
reg->pending_on_resume = true;
return true;
}
#else
static bool irq_do_timetravel_handler(struct irq_entry *entry,
enum um_irq_type t)
{
return false;
}
#endif
static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
struct uml_pt_regs *regs)
{
struct irq_reg *reg = &entry->reg[t];
if (!reg->events)
return;
if (os_epoll_triggered(idx, reg->events) <= 0)
return;
if (irq_do_timetravel_handler(entry, t))
return;
if (irqs_suspended)
return;
irq_io_loop(reg, regs);
}
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
......@@ -84,6 +147,9 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
struct irq_entry *irq_entry;
int n, i;
if (irqs_suspended && !um_irq_timetravel_handler_used())
return;
while (1) {
/* This is now lockless - epoll keeps back-referencesto the irqs
* which have trigger it so there is no need to walk the irq
......@@ -105,19 +171,13 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
irq_entry = os_epoll_get_data_pointer(i);
for (t = 0; t < NUM_IRQ_TYPES; t++) {
int events = irq_entry->reg[t].events;
if (!events)
continue;
if (os_epoll_triggered(i, events) > 0)
irq_io_loop(&irq_entry->reg[t], regs);
}
for (t = 0; t < NUM_IRQ_TYPES; t++)
sigio_reg_handler(i, irq_entry, t, regs);
}
}
free_irqs();
if (!irqs_suspended)
free_irqs();
}
static struct irq_entry *get_irq_entry_by_fd(int fd)
......@@ -169,7 +229,9 @@ static void update_or_free_irq_entry(struct irq_entry *entry)
free_irq_entry(entry, false);
}
static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id)
static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *))
{
struct irq_entry *irq_entry;
int err, events = os_event_mask(type);
......@@ -206,6 +268,13 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id)
irq_entry->reg[type].active = true;
irq_entry->reg[type].events = events;
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
if (um_irq_timetravel_handler_used()) {
irq_entry->reg[type].timetravel_handler = timetravel_handler;
irq_entry->reg[type].event.fn = irq_event_handler;
}
#endif
WARN_ON(!update_irq_entry(irq_entry));
spin_unlock_irqrestore(&irq_lock, flags);
......@@ -339,9 +408,12 @@ void um_free_irq(int irq, void *dev)
}
EXPORT_SYMBOL(um_free_irq);
int um_request_irq(int irq, int fd, enum um_irq_type type,
irq_handler_t handler, unsigned long irqflags,
const char *devname, void *dev_id)
static int
_um_request_irq(int irq, int fd, enum um_irq_type type,
irq_handler_t handler, unsigned long irqflags,
const char *devname, void *dev_id,
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *))
{
int err;
......@@ -360,7 +432,7 @@ int um_request_irq(int irq, int fd, enum um_irq_type type,
return -ENOSPC;
if (fd != -1) {
err = activate_fd(irq, fd, type, dev_id);
err = activate_fd(irq, fd, type, dev_id, timetravel_handler);
if (err)
goto error;
}
......@@ -374,20 +446,41 @@ int um_request_irq(int irq, int fd, enum um_irq_type type,
clear_bit(irq, irqs_allocated);
return err;
}
int um_request_irq(int irq, int fd, enum um_irq_type type,
irq_handler_t handler, unsigned long irqflags,
const char *devname, void *dev_id)
{
return _um_request_irq(irq, fd, type, handler, irqflags,
devname, dev_id, NULL);
}
EXPORT_SYMBOL(um_request_irq);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
irq_handler_t handler, unsigned long irqflags,
const char *devname, void *dev_id,
void (*timetravel_handler)(int, int, void *,
struct time_travel_event *))
{
return _um_request_irq(irq, fd, type, handler, irqflags,
devname, dev_id, timetravel_handler);
}
EXPORT_SYMBOL(um_request_irq_tt);
#endif
#ifdef CONFIG_PM_SLEEP
void um_irqs_suspend(void)
{
struct irq_entry *entry;
unsigned long flags;
sig_info[SIGIO] = sigio_handler_suspend;
irqs_suspended = true;
spin_lock_irqsave(&irq_lock, flags);
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
bool wake = false;
bool clear = true;
for (t = 0; t < NUM_IRQ_TYPES; t++) {
if (!entry->reg[t].events)
......@@ -400,13 +493,17 @@ void um_irqs_suspend(void)
* any FDs that should be suspended.
*/
if (entry->reg[t].wakeup ||
entry->reg[t].irq == SIGIO_WRITE_IRQ) {
wake = true;
entry->reg[t].irq == SIGIO_WRITE_IRQ
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
|| entry->reg[t].timetravel_handler
#endif
) {
clear = false;
break;
}
}
if (!wake) {
if (clear) {
entry->suspended = true;
os_clear_fd_async(entry->fd);
entry->sigio_workaround =
......@@ -421,7 +518,31 @@ void um_irqs_resume(void)
struct irq_entry *entry;
unsigned long flags;
spin_lock_irqsave(&irq_lock, flags);
local_irq_save(flags);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
/*
* We don't need to lock anything here since we're in resume
* and nothing else is running, but have disabled IRQs so we
* don't try anything else with the interrupt list from there.
*/
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
for (t = 0; t < NUM_IRQ_TYPES; t++) {
struct irq_reg *reg = &entry->reg[t];
if (reg->pending_on_resume) {
irq_enter();
generic_handle_irq(reg->irq);
irq_exit();
reg->pending_on_resume = false;
}
}
}
#endif
spin_lock(&irq_lock);
list_for_each_entry(entry, &active_fds, list) {
if (entry->suspended) {
int err = os_set_fd_async(entry->fd);
......@@ -437,7 +558,7 @@ void um_irqs_resume(void)
}
spin_unlock_irqrestore(&irq_lock, flags);
sig_info[SIGIO] = sigio_handler;
irqs_suspended = false;
send_sigio_to_self();
}
......
......@@ -24,29 +24,25 @@
void __attribute__ ((__section__ (".__syscall_stub")))
stub_clone_handler(void)
{
struct stub_data *data = (struct stub_data *) STUB_DATA;
int stack;
struct stub_data *data = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1));
long err;
err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *));
if (err != 0)
goto out;
(unsigned long)data + UM_KERN_PAGE_SIZE / 2 - sizeof(void *));
if (err) {
data->parent_err = err;
goto done;
}
err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
if (err)
goto out;
if (err) {
data->child_err = err;
goto done;
}
remap_stack(data->fd, data->offset);
goto done;
remap_stack_and_trap();
out:
/*
* save current result.
* Parent: pid;
* child: retcode of mmap already saved and it jumps around this
* assignment
*/
data->err = err;
done:
trap_myself();
}
......@@ -14,47 +14,6 @@
#include <os.h>
#include <skas.h>
static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
unsigned long kernel)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, proc);
p4d = p4d_alloc(mm, pgd, proc);
if (!p4d)
goto out;
pud = pud_alloc(mm, p4d, proc);
if (!pud)
goto out_pud;
pmd = pmd_alloc(mm, pud, proc);
if (!pmd)
goto out_pmd;
pte = pte_alloc_map(mm, pmd, proc);
if (!pte)
goto out_pte;
*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
*pte = pte_mkread(*pte);
return 0;
out_pte:
pmd_free(mm, pmd);
out_pmd:
pud_free(mm, pud);
out_pud:
p4d_free(mm, p4d);
out:
return -ENOMEM;
}
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
struct mm_context *from_mm = NULL;
......@@ -98,52 +57,6 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
return ret;
}
void uml_setup_stubs(struct mm_struct *mm)
{
int err, ret;
ret = init_stub_pte(mm, STUB_CODE,
(unsigned long) __syscall_stub_start);
if (ret)
goto out;
ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack);
if (ret)
goto out;
mm->context.stub_pages[0] = virt_to_page(__syscall_stub_start);
mm->context.stub_pages[1] = virt_to_page(mm->context.id.stack);
/* dup_mmap already holds mmap_lock */
err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
VM_READ | VM_MAYREAD | VM_EXEC |
VM_MAYEXEC | VM_DONTCOPY | VM_PFNMAP,
mm->context.stub_pages);
if (err) {
printk(KERN_ERR "install_special_mapping returned %d\n", err);
goto out;
}
return;
out:
force_sigsegv(SIGSEGV);
}
void arch_exit_mmap(struct mm_struct *mm)
{
pte_t *pte;
pte = virt_to_pte(mm, STUB_CODE);
if (pte != NULL)
pte_clear(mm, STUB_CODE, pte);
pte = virt_to_pte(mm, STUB_DATA);
if (pte == NULL)
return;
pte_clear(mm, STUB_DATA, pte);
}
void destroy_context(struct mm_struct *mm)
{
struct mm_context *mmu = &mm->context;
......
......@@ -278,6 +278,7 @@ static void __time_travel_add_event(struct time_travel_event *e,
{
struct time_travel_event *tmp;
bool inserted = false;
unsigned long flags;
if (e->pending)
return;
......@@ -285,6 +286,7 @@ static void __time_travel_add_event(struct time_travel_event *e,
e->pending = true;
e->time = time;
local_irq_save(flags);
list_for_each_entry(tmp, &time_travel_events, list) {
/*
* Add the new entry before one with higher time,
......@@ -307,6 +309,7 @@ static void __time_travel_add_event(struct time_travel_event *e,
tmp = time_travel_first_event();
time_travel_ext_update_request(tmp->time);
time_travel_next_event = tmp->time;
local_irq_restore(flags);
}
static void time_travel_add_event(struct time_travel_event *e,
......@@ -318,6 +321,12 @@ static void time_travel_add_event(struct time_travel_event *e,
__time_travel_add_event(e, time);
}
void time_travel_add_event_rel(struct time_travel_event *e,
unsigned long long delay_ns)
{
time_travel_add_event(e, time_travel_time + delay_ns);
}
void time_travel_periodic_timer(struct time_travel_event *e)
{
time_travel_add_event(&time_travel_timer_event,
......@@ -381,12 +390,16 @@ static void time_travel_deliver_event(struct time_travel_event *e)
}
}
static bool time_travel_del_event(struct time_travel_event *e)
bool time_travel_del_event(struct time_travel_event *e)
{
unsigned long flags;
if (!e->pending)
return false;
local_irq_save(flags);
list_del(&e->list);
e->pending = false;
local_irq_restore(flags);
return true;
}
......@@ -587,6 +600,8 @@ extern u64 time_travel_ext_req(u32 op, u64 time);
/* these are empty macros so the struct/fn need not exist */
#define time_travel_add_event(e, time) do { } while (0)
/* externally not usable - redefine here so we can */
#undef time_travel_del_event
#define time_travel_del_event(e) do { } while (0)
#endif
......
......@@ -162,9 +162,6 @@ static int add_munmap(unsigned long addr, unsigned long len,
struct host_vm_op *last;
int ret = 0;
if ((addr >= STUB_START) && (addr < STUB_END))
return -EINVAL;
if (hvc->index != 0) {
last = &hvc->ops[hvc->index - 1];
if ((last->type == MUNMAP) &&
......@@ -226,9 +223,6 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
pte = pte_offset_kernel(pmd, addr);
do {
if ((addr >= STUB_START) && (addr < STUB_END))
continue;
r = pte_read(*pte);
w = pte_write(*pte);
x = pte_exec(*pte);
......@@ -346,12 +340,11 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
/* This is not an else because ret is modified above */
if (ret) {
struct mm_id *mm_idp = &current->mm->context.id;
printk(KERN_ERR "fix_range_common: failed, killing current "
"process: %d\n", task_tgid_vnr(current));
/* We are under mmap_lock, release it such that current can terminate */
mmap_write_unlock(current->mm);
force_sig(SIGKILL);
do_signal(&current->thread.regs);
mm_idp->kill = 1;
}
}
......@@ -472,6 +465,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
struct mm_id *mm_id;
address &= PAGE_MASK;
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
goto kill;
......
......@@ -249,6 +249,7 @@ void uml_finishsetup(void)
}
/* Set during early boot */
unsigned long stub_start;
unsigned long task_size;
EXPORT_SYMBOL(task_size);
......@@ -283,6 +284,10 @@ int __init linux_main(int argc, char **argv)
add_arg(DEFAULT_COMMAND_LINE_CONSOLE);
host_task_size = os_get_top_address();
/* reserve two pages for the stubs */
host_task_size -= 2 * PAGE_SIZE;
stub_start = host_task_size;
/*
* TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps
* out
......
......@@ -40,6 +40,8 @@ static int __init init_syscall_regs(void)
syscall_regs[REGS_IP_INDEX] = STUB_CODE +
((unsigned long) batch_syscall_stub -
(unsigned long) __syscall_stub_start);
syscall_regs[REGS_SP_INDEX] = STUB_DATA;
return 0;
}
......
......@@ -28,6 +28,54 @@ int is_skas_winch(int pid, int fd, void *data)
return pid == getpgrp();
}
static const char *ptrace_reg_name(int idx)
{
#define R(n) case HOST_##n: return #n
switch (idx) {
#ifdef __x86_64__
R(BX);
R(CX);
R(DI);
R(SI);
R(DX);
R(BP);
R(AX);
R(R8);
R(R9);
R(R10);
R(R11);
R(R12);
R(R13);
R(R14);
R(R15);
R(ORIG_AX);
R(CS);
R(SS);
R(EFLAGS);
#elif defined(__i386__)
R(IP);
R(SP);
R(EFLAGS);
R(AX);
R(BX);
R(CX);
R(DX);
R(SI);
R(DI);
R(BP);
R(CS);
R(SS);
R(DS);
R(FS);
R(ES);
R(GS);
R(ORIG_AX);
#endif
}
return "";
}
static int ptrace_dump_regs(int pid)
{
unsigned long regs[MAX_REG_NR];
......@@ -37,8 +85,11 @@ static int ptrace_dump_regs(int pid)
return -errno;
printk(UM_KERN_ERR "Stub registers -\n");
for (i = 0; i < ARRAY_SIZE(regs); i++)
printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]);
for (i = 0; i < ARRAY_SIZE(regs); i++) {
const char *regname = ptrace_reg_name(i);
printk(UM_KERN_ERR "\t%s\t(%2d): %lx\n", regname, i, regs[i]);
}
return 0;
}
......@@ -200,10 +251,6 @@ static int userspace_tramp(void *stack)
signal(SIGTERM, SIG_DFL);
signal(SIGWINCH, SIG_IGN);
/*
* This has a pte, but it can't be mapped in with the usual
* tlb_flush mechanism because this is part of that mechanism
*/
fd = phys_mapping(to_phys(__syscall_stub_start), &offset);
addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
......@@ -249,6 +296,7 @@ static int userspace_tramp(void *stack)
}
int userspace_pid[NR_CPUS];
int kill_userspace_mm[NR_CPUS];
/**
* start_userspace() - prepare a new userspace process
......@@ -342,6 +390,8 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
interrupt_end();
while (1) {
if (kill_userspace_mm[0])
fatal_sigsegv();
/*
* This can legitimately fail if the process loads a
......@@ -491,8 +541,14 @@ int copy_context_skas0(unsigned long new_stack, int pid)
* and child's mmap2 calls
*/
*data = ((struct stub_data) {
.offset = MMAP_OFFSET(new_offset),
.fd = new_fd
.offset = MMAP_OFFSET(new_offset),
.fd = new_fd,
.parent_err = -ESRCH,
.child_err = 0,
});
*child_data = ((struct stub_data) {
.child_err = -ESRCH,
});
err = ptrace_setregs(pid, thread_regs);
......@@ -510,9 +566,6 @@ int copy_context_skas0(unsigned long new_stack, int pid)
return err;
}
/* set a well known return code for detection of child write failure */
child_data->err = 12345678;
/*
* Wait, until parent has finished its work: read child's pid from
* parent's stack, and check, if bad result.
......@@ -527,7 +580,7 @@ int copy_context_skas0(unsigned long new_stack, int pid)
wait_stub_done(pid);
pid = data->err;
pid = data->parent_err;
if (pid < 0) {
printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports "
"error %d\n", -pid);
......@@ -539,10 +592,10 @@ int copy_context_skas0(unsigned long new_stack, int pid)
* child's stack and check it.
*/
wait_stub_done(pid);
if (child_data->err != STUB_DATA) {
printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports "
"error %ld\n", child_data->err);
err = child_data->err;
if (child_data->child_err != STUB_DATA) {
printk(UM_KERN_ERR "copy_context_skas0 - stub-child %d reports "
"error %ld\n", pid, data->child_err);
err = data->child_err;
goto out_kill;
}
......@@ -663,4 +716,5 @@ void reboot_skas(void)
void __switch_mm(struct mm_id *mm_idp)
{
userspace_pid[0] = mm_idp->u.pid;
kill_userspace_mm[0] = mm_idp->kill;
}
......@@ -145,7 +145,7 @@ unsigned long os_get_top_address(void)
unsigned long os_get_top_address(void)
{
/* The old value of CONFIG_TOP_ADDR */
return 0x7fc0000000;
return 0x7fc0002000;
}
#endif
......@@ -7,8 +7,8 @@
#define __SYSDEP_STUB_H
#include <asm/ptrace.h>
#include <generated/asm-offsets.h>
#define STUB_SYSCALL_RET EAX
#define STUB_MMAP_NR __NR_mmap2
#define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT)
......@@ -77,17 +77,28 @@ static inline void trap_myself(void)
__asm("int3");
}
static inline void remap_stack(int fd, unsigned long offset)
static void inline remap_stack_and_trap(void)
{
__asm__ volatile ("movl %%eax,%%ebp ; movl %0,%%eax ; int $0x80 ;"
"movl %7, %%ebx ; movl %%eax, (%%ebx)"
: : "g" (STUB_MMAP_NR), "b" (STUB_DATA),
"c" (UM_KERN_PAGE_SIZE),
"d" (PROT_READ | PROT_WRITE),
"S" (MAP_FIXED | MAP_SHARED), "D" (fd),
"a" (offset),
"i" (&((struct stub_data *) STUB_DATA)->err)
: "memory");
__asm__ volatile (
"movl %%esp,%%ebx ;"
"andl %0,%%ebx ;"
"movl %1,%%eax ;"
"movl %%ebx,%%edi ; addl %2,%%edi ; movl (%%edi),%%edi ;"
"movl %%ebx,%%ebp ; addl %3,%%ebp ; movl (%%ebp),%%ebp ;"
"int $0x80 ;"
"addl %4,%%ebx ; movl %%eax, (%%ebx) ;"
"int $3"
: :
"g" (~(UM_KERN_PAGE_SIZE - 1)),
"g" (STUB_MMAP_NR),
"g" (UML_STUB_FIELD_FD),
"g" (UML_STUB_FIELD_OFFSET),
"g" (UML_STUB_FIELD_CHILD_ERR),
"c" (UM_KERN_PAGE_SIZE),
"d" (PROT_READ | PROT_WRITE),
"S" (MAP_FIXED | MAP_SHARED)
:
"memory");
}
#endif
......@@ -7,8 +7,8 @@
#define __SYSDEP_STUB_H
#include <sysdep/ptrace_user.h>
#include <generated/asm-offsets.h>
#define STUB_SYSCALL_RET PT_INDEX(RAX)
#define STUB_MMAP_NR __NR_mmap
#define MMAP_OFFSET(o) (o)
......@@ -82,18 +82,30 @@ static inline void trap_myself(void)
__asm("int3");
}
static inline void remap_stack(long fd, unsigned long offset)
static inline void remap_stack_and_trap(void)
{
__asm__ volatile ("movq %4,%%r10 ; movq %5,%%r8 ; "
"movq %6, %%r9; " __syscall "; movq %7, %%rbx ; "
"movq %%rax, (%%rbx)":
: "a" (STUB_MMAP_NR), "D" (STUB_DATA),
"S" (UM_KERN_PAGE_SIZE),
"d" (PROT_READ | PROT_WRITE),
"g" (MAP_FIXED | MAP_SHARED), "g" (fd),
"g" (offset),
"i" (&((struct stub_data *) STUB_DATA)->err)
: __syscall_clobber, "r10", "r8", "r9" );
__asm__ volatile (
"movq %0,%%rax ;"
"movq %%rsp,%%rdi ;"
"andq %1,%%rdi ;"
"movq %2,%%r10 ;"
"movq %%rdi,%%r8 ; addq %3,%%r8 ; movq (%%r8),%%r8 ;"
"movq %%rdi,%%r9 ; addq %4,%%r9 ; movq (%%r9),%%r9 ;"
__syscall ";"
"movq %%rsp,%%rdi ; andq %1,%%rdi ;"
"addq %5,%%rdi ; movq %%rax, (%%rdi) ;"
"int3"
: :
"g" (STUB_MMAP_NR),
"g" (~(UM_KERN_PAGE_SIZE - 1)),
"g" (MAP_FIXED | MAP_SHARED),
"g" (UML_STUB_FIELD_FD),
"g" (UML_STUB_FIELD_OFFSET),
"g" (UML_STUB_FIELD_CHILD_ERR),
"S" (UM_KERN_PAGE_SIZE),
"d" (PROT_READ | PROT_WRITE)
:
__syscall_clobber, "r10", "r8", "r9");
}
#endif
......@@ -5,21 +5,22 @@
.globl batch_syscall_stub
batch_syscall_stub:
/* load pointer to first operation */
mov $(STUB_DATA+8), %esp
/* %esp comes in as "top of page" */
mov %esp, %ecx
/* %esp has pointer to first operation */
add $8, %esp
again:
/* load length of additional data */
mov 0x0(%esp), %eax
/* if(length == 0) : end of list */
/* write possible 0 to header */
mov %eax, STUB_DATA+4
mov %eax, 0x4(%ecx)
cmpl $0, %eax
jz done
/* save current pointer */
mov %esp, STUB_DATA+4
mov %esp, 0x4(%ecx)
/* skip additional data */
add %eax, %esp
......@@ -38,6 +39,10 @@ again:
/* execute syscall */
int $0x80
/* restore top of page pointer in %ecx */
mov %esp, %ecx
andl $(~UM_KERN_PAGE_SIZE) + 1, %ecx
/* check return value */
pop %ebx
cmp %ebx, %eax
......@@ -45,7 +50,7 @@ again:
done:
/* save return value */
mov %eax, STUB_DATA
mov %eax, (%ecx)
/* stop */
int3
......@@ -4,9 +4,8 @@
.section .__syscall_stub, "ax"
.globl batch_syscall_stub
batch_syscall_stub:
mov $(STUB_DATA), %rbx
/* load pointer to first operation */
mov %rbx, %rsp
/* %rsp has the pointer to first operation */
mov %rsp, %rbx
add $0x10, %rsp
again:
/* load length of additional data */
......
......@@ -11,10 +11,11 @@
void __attribute__ ((__section__ (".__syscall_stub")))
stub_segv_handler(int sig, siginfo_t *info, void *p)
{
int stack;
ucontext_t *uc = p;
struct faultinfo *f = (void *)(((unsigned long)&stack) & ~(UM_KERN_PAGE_SIZE - 1));
GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
&uc->uc_mcontext);
GET_FAULTINFO_FROM_MC(*f, &uc->uc_mcontext);
trap_myself();
}
......@@ -34,6 +34,8 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
#define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
static struct kmem_cache *hostfs_inode_cache;
/* Changed in hostfs_args before the kernel starts running */
static char *root_ino = "";
static int append = 0;
......@@ -221,7 +223,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
{
struct hostfs_inode_info *hi;
hi = kmalloc(sizeof(*hi), GFP_KERNEL_ACCOUNT);
hi = kmem_cache_alloc(hostfs_inode_cache, GFP_KERNEL_ACCOUNT);
if (hi == NULL)
return NULL;
hi->fd = -1;
......@@ -243,7 +245,7 @@ static void hostfs_evict_inode(struct inode *inode)
static void hostfs_free_inode(struct inode *inode)
{
kfree(HOSTFS_I(inode));
kmem_cache_free(hostfs_inode_cache, HOSTFS_I(inode));
}
static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
......@@ -986,12 +988,16 @@ MODULE_ALIAS_FS("hostfs");
static int __init init_hostfs(void)
{
hostfs_inode_cache = KMEM_CACHE(hostfs_inode_info, 0);
if (!hostfs_inode_cache)
return -ENOMEM;
return register_filesystem(&hostfs_type);
}
static void __exit exit_hostfs(void)
{
unregister_filesystem(&hostfs_type);
kmem_cache_destroy(hostfs_inode_cache);
}
module_init(init_hostfs)
......
......@@ -35,7 +35,6 @@ mandatory-y += kprobes.h
mandatory-y += linkage.h
mandatory-y += local.h
mandatory-y += local64.h
mandatory-y += mm-arch-hooks.h
mandatory-y += mmiowb.h
mandatory-y += mmu.h
mandatory-y += mmu_context.h
......
/*
* Architecture specific mm hooks
*/
#ifndef _ASM_GENERIC_MM_ARCH_HOOKS_H
#define _ASM_GENERIC_MM_ARCH_HOOKS_H
/*
* This file should be included through arch/../include/asm/Kbuild for
* the architecture which doesn't need specific mm hooks.
*
* In that case, the generic hooks defined in include/linux/mm-arch-hooks.h
* are used.
*/
#endif /* _ASM_GENERIC_MM_ARCH_HOOKS_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Generic mm no-op hooks.
*
* Copyright (C) 2015, IBM Corporation
* Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
*/
#ifndef _LINUX_MM_ARCH_HOOKS_H
#define _LINUX_MM_ARCH_HOOKS_H
#include <asm/mm-arch-hooks.h>
#ifndef arch_remap
static inline void arch_remap(struct mm_struct *mm,
unsigned long old_start, unsigned long old_end,
unsigned long new_start, unsigned long new_end)
{
}
#define arch_remap arch_remap
#endif
#endif /* _LINUX_MM_ARCH_HOOKS_H */
......@@ -22,7 +22,6 @@
#include <linux/syscalls.h>
#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/mm-arch-hooks.h>
#include <linux/userfaultfd_k.h>
#include <asm/cacheflush.h>
......@@ -563,8 +562,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
new_addr = err;
} else {
mremap_userfaultfd_prep(new_vma, uf);
arch_remap(mm, old_addr, old_addr + old_len,
new_addr, new_addr + new_len);
}
/* Conceal VM_ACCOUNT so old reservation is not undone */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment