Commit 12cc5240 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'uml-for-linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

Pull UML updates from Richard Weinberger:

 - Removal of dead code (TT mode leftovers, etc)

 - Fixes for the network vector driver

 - Fixes for time-travel mode

* tag 'uml-for-linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux:
  um: fix time-travel syscall scheduling hack
  um: Remove outdated asm/sysrq.h header
  um: Remove the declaration of user_thread function
  um: Remove the call to SUBARCH_EXECVE1 macro
  um: Remove unused mm_fd field from mm_id
  um: Remove unused fields from thread_struct
  um: Remove the redundant newpage check in update_pte_range
  um: Remove unused kpte_clear_flush macro
  um: Remove obsoleted declaration for execute_syscall_skas
  user_mode_linux_howto_v2: add VDE vector support in doc
  vector_user: add VDE support
  um: remove ARCH_NO_PREEMPT_DYNAMIC
  um: vector: Fix NAPI budget handling
  um: vector: Replace locks guarding queue depth with atomics
  um: remove variable stack array in os_rcv_fd_msg()
parents 0c33037c 381d2f95
...@@ -217,6 +217,8 @@ remote UML and other VM instances. ...@@ -217,6 +217,8 @@ remote UML and other VM instances.
+-----------+--------+------------------------------------+------------+ +-----------+--------+------------------------------------+------------+
| fd | vector | dependent on fd type | varies | | fd | vector | dependent on fd type | varies |
+-----------+--------+------------------------------------+------------+ +-----------+--------+------------------------------------+------------+
| vde | vector | dep. on VDE VPN: Virt.Net Locator | varies |
+-----------+--------+------------------------------------+------------+
| tuntap | legacy | none | ~ 500Mbit | | tuntap | legacy | none | ~ 500Mbit |
+-----------+--------+------------------------------------+------------+ +-----------+--------+------------------------------------+------------+
| daemon | legacy | none | ~ 450Mbit | | daemon | legacy | none | ~ 450Mbit |
...@@ -573,6 +575,41 @@ https://github.com/NetSys/bess/wiki/Built-In-Modules-and-Ports ...@@ -573,6 +575,41 @@ https://github.com/NetSys/bess/wiki/Built-In-Modules-and-Ports
BESS transport does not require any special privileges. BESS transport does not require any special privileges.
VDE vector transport
--------------------
Virtual Distributed Ethernet (VDE) is a project whose main goal is to provide a
highly flexible support for virtual networking.
http://wiki.virtualsquare.org/#/tutorials/vdebasics
Common usages of VDE include fast prototyping and teaching.
Examples:
``vecX:transport=vde,vnl=tap://tap0``
use tap0
``vecX:transport=vde,vnl=slirp://``
use slirp
``vec0:transport=vde,vnl=vde:///tmp/switch``
connect to a vde switch
``vecX:transport=\"vde,vnl=cmd://ssh remote.host //tmp/sshlirp\"``
connect to a remote slirp (instant VPN: convert ssh to VPN, it uses sshlirp)
https://github.com/virtualsquare/sshlirp
``vec0:transport=vde,vnl=vxvde://234.0.0.1``
connect to a local area cloud (all the UML nodes using the same
multicast address running on hosts in the same multicast domain (LAN)
will be automagically connected together to a virtual LAN.
Configuring Legacy transports Configuring Legacy transports
============================= =============================
......
...@@ -11,7 +11,6 @@ config UML ...@@ -11,7 +11,6 @@ config UML
select ARCH_HAS_KCOV select ARCH_HAS_KCOV
select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER select ARCH_HAS_STRNLEN_USER
select ARCH_NO_PREEMPT_DYNAMIC
select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/firmware.h> #include <linux/firmware.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <asm/atomic.h>
#include <uapi/linux/filter.h> #include <uapi/linux/filter.h>
#include <init.h> #include <init.h>
#include <irq_kern.h> #include <irq_kern.h>
...@@ -102,18 +103,33 @@ static const struct { ...@@ -102,18 +103,33 @@ static const struct {
static void vector_reset_stats(struct vector_private *vp) static void vector_reset_stats(struct vector_private *vp)
{ {
/* We reuse the existing queue locks for stats */
/* RX stats are modified with RX head_lock held
* in vector_poll.
*/
spin_lock(&vp->rx_queue->head_lock);
vp->estats.rx_queue_max = 0; vp->estats.rx_queue_max = 0;
vp->estats.rx_queue_running_average = 0; vp->estats.rx_queue_running_average = 0;
vp->estats.tx_queue_max = 0;
vp->estats.tx_queue_running_average = 0;
vp->estats.rx_encaps_errors = 0; vp->estats.rx_encaps_errors = 0;
vp->estats.sg_ok = 0;
vp->estats.sg_linearized = 0;
spin_unlock(&vp->rx_queue->head_lock);
/* TX stats are modified with TX head_lock held
* in vector_send.
*/
spin_lock(&vp->tx_queue->head_lock);
vp->estats.tx_timeout_count = 0; vp->estats.tx_timeout_count = 0;
vp->estats.tx_restart_queue = 0; vp->estats.tx_restart_queue = 0;
vp->estats.tx_kicks = 0; vp->estats.tx_kicks = 0;
vp->estats.tx_flow_control_xon = 0; vp->estats.tx_flow_control_xon = 0;
vp->estats.tx_flow_control_xoff = 0; vp->estats.tx_flow_control_xoff = 0;
vp->estats.sg_ok = 0; vp->estats.tx_queue_max = 0;
vp->estats.sg_linearized = 0; vp->estats.tx_queue_running_average = 0;
spin_unlock(&vp->tx_queue->head_lock);
} }
static int get_mtu(struct arglist *def) static int get_mtu(struct arglist *def)
...@@ -232,12 +248,6 @@ static int get_transport_options(struct arglist *def) ...@@ -232,12 +248,6 @@ static int get_transport_options(struct arglist *def)
static char *drop_buffer; static char *drop_buffer;
/* Array backed queues optimized for bulk enqueue/dequeue and
* 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
* For more details and full design rationale see
* http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
*/
/* /*
* Advance the mmsg queue head by n = advance. Resets the queue to * Advance the mmsg queue head by n = advance. Resets the queue to
...@@ -247,27 +257,13 @@ static char *drop_buffer; ...@@ -247,27 +257,13 @@ static char *drop_buffer;
static int vector_advancehead(struct vector_queue *qi, int advance) static int vector_advancehead(struct vector_queue *qi, int advance)
{ {
int queue_depth;
qi->head = qi->head =
(qi->head + advance) (qi->head + advance)
% qi->max_depth; % qi->max_depth;
spin_lock(&qi->tail_lock); atomic_sub(advance, &qi->queue_depth);
qi->queue_depth -= advance; return atomic_read(&qi->queue_depth);
/* we are at 0, use this to
* reset head and tail so we can use max size vectors
*/
if (qi->queue_depth == 0) {
qi->head = 0;
qi->tail = 0;
}
queue_depth = qi->queue_depth;
spin_unlock(&qi->tail_lock);
return queue_depth;
} }
/* Advance the queue tail by n = advance. /* Advance the queue tail by n = advance.
...@@ -277,16 +273,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance) ...@@ -277,16 +273,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance)
static int vector_advancetail(struct vector_queue *qi, int advance) static int vector_advancetail(struct vector_queue *qi, int advance)
{ {
int queue_depth;
qi->tail = qi->tail =
(qi->tail + advance) (qi->tail + advance)
% qi->max_depth; % qi->max_depth;
spin_lock(&qi->head_lock); atomic_add(advance, &qi->queue_depth);
qi->queue_depth += advance; return atomic_read(&qi->queue_depth);
queue_depth = qi->queue_depth;
spin_unlock(&qi->head_lock);
return queue_depth;
} }
static int prep_msg(struct vector_private *vp, static int prep_msg(struct vector_private *vp,
...@@ -339,9 +330,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) ...@@ -339,9 +330,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
int iov_count; int iov_count;
spin_lock(&qi->tail_lock); spin_lock(&qi->tail_lock);
spin_lock(&qi->head_lock); queue_depth = atomic_read(&qi->queue_depth);
queue_depth = qi->queue_depth;
spin_unlock(&qi->head_lock);
if (skb) if (skb)
packet_len = skb->len; packet_len = skb->len;
...@@ -360,6 +349,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) ...@@ -360,6 +349,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
mmsg_vector->msg_hdr.msg_iovlen = iov_count; mmsg_vector->msg_hdr.msg_iovlen = iov_count;
mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
wmb(); /* Make the packet visible to the NAPI poll thread */
queue_depth = vector_advancetail(qi, 1); queue_depth = vector_advancetail(qi, 1);
} else } else
goto drop; goto drop;
...@@ -398,7 +388,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count) ...@@ -398,7 +388,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count)
} }
/* /*
* Generic vector deque via sendmmsg with support for forming headers * Generic vector dequeue via sendmmsg with support for forming headers
* using transport specific callback. Allows GRE, L2TPv3, RAW and * using transport specific callback. Allows GRE, L2TPv3, RAW and
* other transports to use a common dequeue procedure in vector mode * other transports to use a common dequeue procedure in vector mode
*/ */
...@@ -408,69 +398,64 @@ static int vector_send(struct vector_queue *qi) ...@@ -408,69 +398,64 @@ static int vector_send(struct vector_queue *qi)
{ {
struct vector_private *vp = netdev_priv(qi->dev); struct vector_private *vp = netdev_priv(qi->dev);
struct mmsghdr *send_from; struct mmsghdr *send_from;
int result = 0, send_len, queue_depth = qi->max_depth; int result = 0, send_len;
if (spin_trylock(&qi->head_lock)) { if (spin_trylock(&qi->head_lock)) {
if (spin_trylock(&qi->tail_lock)) { /* update queue_depth to current value */
/* update queue_depth to current value */ while (atomic_read(&qi->queue_depth) > 0) {
queue_depth = qi->queue_depth; /* Calculate the start of the vector */
spin_unlock(&qi->tail_lock); send_len = atomic_read(&qi->queue_depth);
while (queue_depth > 0) { send_from = qi->mmsg_vector;
/* Calculate the start of the vector */ send_from += qi->head;
send_len = queue_depth; /* Adjust vector size if wraparound */
send_from = qi->mmsg_vector; if (send_len + qi->head > qi->max_depth)
send_from += qi->head; send_len = qi->max_depth - qi->head;
/* Adjust vector size if wraparound */ /* Try to TX as many packets as possible */
if (send_len + qi->head > qi->max_depth) if (send_len > 0) {
send_len = qi->max_depth - qi->head; result = uml_vector_sendmmsg(
/* Try to TX as many packets as possible */ vp->fds->tx_fd,
if (send_len > 0) { send_from,
result = uml_vector_sendmmsg( send_len,
vp->fds->tx_fd, 0
send_from, );
send_len, vp->in_write_poll =
0 (result != send_len);
); }
vp->in_write_poll = /* For some of the sendmmsg error scenarios
(result != send_len); * we may end being unsure in the TX success
} * for all packets. It is safer to declare
/* For some of the sendmmsg error scenarios * them all TX-ed and blame the network.
* we may end being unsure in the TX success */
* for all packets. It is safer to declare if (result < 0) {
* them all TX-ed and blame the network. if (net_ratelimit())
*/ netdev_err(vp->dev, "sendmmsg err=%i\n",
if (result < 0) { result);
if (net_ratelimit()) vp->in_error = true;
netdev_err(vp->dev, "sendmmsg err=%i\n", result = send_len;
result); }
vp->in_error = true; if (result > 0) {
result = send_len; consume_vector_skbs(qi, result);
} /* This is equivalent to an TX IRQ.
if (result > 0) { * Restart the upper layers to feed us
queue_depth = * more packets.
consume_vector_skbs(qi, result);
/* This is equivalent to an TX IRQ.
* Restart the upper layers to feed us
* more packets.
*/
if (result > vp->estats.tx_queue_max)
vp->estats.tx_queue_max = result;
vp->estats.tx_queue_running_average =
(vp->estats.tx_queue_running_average + result) >> 1;
}
netif_wake_queue(qi->dev);
/* if TX is busy, break out of the send loop,
* poll write IRQ will reschedule xmit for us
*/ */
if (result != send_len) { if (result > vp->estats.tx_queue_max)
vp->estats.tx_restart_queue++; vp->estats.tx_queue_max = result;
break; vp->estats.tx_queue_running_average =
} (vp->estats.tx_queue_running_average + result) >> 1;
}
netif_wake_queue(qi->dev);
/* if TX is busy, break out of the send loop,
* poll write IRQ will reschedule xmit for us.
*/
if (result != send_len) {
vp->estats.tx_restart_queue++;
break;
} }
} }
spin_unlock(&qi->head_lock); spin_unlock(&qi->head_lock);
} }
return queue_depth; return atomic_read(&qi->queue_depth);
} }
/* Queue destructor. Deliberately stateless so we can use /* Queue destructor. Deliberately stateless so we can use
...@@ -589,7 +574,7 @@ static struct vector_queue *create_queue( ...@@ -589,7 +574,7 @@ static struct vector_queue *create_queue(
} }
spin_lock_init(&result->head_lock); spin_lock_init(&result->head_lock);
spin_lock_init(&result->tail_lock); spin_lock_init(&result->tail_lock);
result->queue_depth = 0; atomic_set(&result->queue_depth, 0);
result->head = 0; result->head = 0;
result->tail = 0; result->tail = 0;
return result; return result;
...@@ -668,18 +653,27 @@ static struct sk_buff *prep_skb( ...@@ -668,18 +653,27 @@ static struct sk_buff *prep_skb(
} }
/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */
static void prep_queue_for_rx(struct vector_queue *qi) static void prep_queue_for_rx(struct vector_queue *qi)
{ {
struct vector_private *vp = netdev_priv(qi->dev); struct vector_private *vp = netdev_priv(qi->dev);
struct mmsghdr *mmsg_vector = qi->mmsg_vector; struct mmsghdr *mmsg_vector = qi->mmsg_vector;
void **skbuff_vector = qi->skbuff_vector; void **skbuff_vector = qi->skbuff_vector;
int i; int i, queue_depth;
queue_depth = atomic_read(&qi->queue_depth);
if (qi->queue_depth == 0) if (queue_depth == 0)
return; return;
for (i = 0; i < qi->queue_depth; i++) {
/* RX is always emptied 100% during each cycle, so we do not
* have to do the tail wraparound math for it.
*/
qi->head = qi->tail = 0;
for (i = 0; i < queue_depth; i++) {
/* it is OK if allocation fails - recvmmsg with NULL data in /* it is OK if allocation fails - recvmmsg with NULL data in
* iov argument still performs an RX, just drops the packet * iov argument still performs an RX, just drops the packet
* This allows us stop faffing around with a "drop buffer" * This allows us stop faffing around with a "drop buffer"
...@@ -689,7 +683,7 @@ static void prep_queue_for_rx(struct vector_queue *qi) ...@@ -689,7 +683,7 @@ static void prep_queue_for_rx(struct vector_queue *qi)
skbuff_vector++; skbuff_vector++;
mmsg_vector++; mmsg_vector++;
} }
qi->queue_depth = 0; atomic_set(&qi->queue_depth, 0);
} }
static struct vector_device *find_device(int n) static struct vector_device *find_device(int n)
...@@ -972,7 +966,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget) ...@@ -972,7 +966,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget)
budget = qi->max_depth; budget = qi->max_depth;
packet_count = uml_vector_recvmmsg( packet_count = uml_vector_recvmmsg(
vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); vp->fds->rx_fd, qi->mmsg_vector, budget, 0);
if (packet_count < 0) if (packet_count < 0)
vp->in_error = true; vp->in_error = true;
...@@ -985,7 +979,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget) ...@@ -985,7 +979,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget)
* many do we need to prep the next time prep_queue_for_rx() is called. * many do we need to prep the next time prep_queue_for_rx() is called.
*/ */
qi->queue_depth = packet_count; atomic_add(packet_count, &qi->queue_depth);
for (i = 0; i < packet_count; i++) { for (i = 0; i < packet_count; i++) {
skb = (*skbuff_vector); skb = (*skbuff_vector);
...@@ -1172,6 +1166,7 @@ static int vector_poll(struct napi_struct *napi, int budget) ...@@ -1172,6 +1166,7 @@ static int vector_poll(struct napi_struct *napi, int budget)
if ((vp->options & VECTOR_TX) != 0) if ((vp->options & VECTOR_TX) != 0)
tx_enqueued = (vector_send(vp->tx_queue) > 0); tx_enqueued = (vector_send(vp->tx_queue) > 0);
spin_lock(&vp->rx_queue->head_lock);
if ((vp->options & VECTOR_RX) > 0) if ((vp->options & VECTOR_RX) > 0)
err = vector_mmsg_rx(vp, budget); err = vector_mmsg_rx(vp, budget);
else { else {
...@@ -1179,12 +1174,13 @@ static int vector_poll(struct napi_struct *napi, int budget) ...@@ -1179,12 +1174,13 @@ static int vector_poll(struct napi_struct *napi, int budget)
if (err > 0) if (err > 0)
err = 1; err = 1;
} }
spin_unlock(&vp->rx_queue->head_lock);
if (err > 0) if (err > 0)
work_done += err; work_done += err;
if (tx_enqueued || err > 0) if (tx_enqueued || err > 0)
napi_schedule(napi); napi_schedule(napi);
if (work_done < budget) if (work_done <= budget)
napi_complete_done(napi, work_done); napi_complete_done(napi, work_done);
return work_done; return work_done;
} }
...@@ -1225,7 +1221,7 @@ static int vector_net_open(struct net_device *dev) ...@@ -1225,7 +1221,7 @@ static int vector_net_open(struct net_device *dev)
vp->rx_header_size, vp->rx_header_size,
MAX_IOV_SIZE MAX_IOV_SIZE
); );
vp->rx_queue->queue_depth = get_depth(vp->parsed); atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed));
} else { } else {
vp->header_rxbuffer = kmalloc( vp->header_rxbuffer = kmalloc(
vp->rx_header_size, vp->rx_header_size,
...@@ -1467,7 +1463,17 @@ static void vector_get_ethtool_stats(struct net_device *dev, ...@@ -1467,7 +1463,17 @@ static void vector_get_ethtool_stats(struct net_device *dev,
{ {
struct vector_private *vp = netdev_priv(dev); struct vector_private *vp = netdev_priv(dev);
/* Stats are modified in the dequeue portions of
* rx/tx which are protected by the head locks
* grabbing these locks here ensures they are up
* to date.
*/
spin_lock(&vp->tx_queue->head_lock);
spin_lock(&vp->rx_queue->head_lock);
memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
spin_unlock(&vp->rx_queue->head_lock);
spin_unlock(&vp->tx_queue->head_lock);
} }
static int vector_get_coalesce(struct net_device *netdev, static int vector_get_coalesce(struct net_device *netdev,
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <asm/atomic.h>
#include "vector_user.h" #include "vector_user.h"
...@@ -44,7 +45,8 @@ struct vector_queue { ...@@ -44,7 +45,8 @@ struct vector_queue {
struct net_device *dev; struct net_device *dev;
spinlock_t head_lock; spinlock_t head_lock;
spinlock_t tail_lock; spinlock_t tail_lock;
int queue_depth, head, tail, max_depth, max_iov_frags; atomic_t queue_depth;
int head, tail, max_depth, max_iov_frags;
short options; short options;
}; };
......
...@@ -46,6 +46,9 @@ ...@@ -46,6 +46,9 @@
#define TRANS_FD "fd" #define TRANS_FD "fd"
#define TRANS_FD_LEN strlen(TRANS_FD) #define TRANS_FD_LEN strlen(TRANS_FD)
#define TRANS_VDE "vde"
#define TRANS_VDE_LEN strlen(TRANS_VDE)
#define VNET_HDR_FAIL "could not enable vnet headers on fd %d" #define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s" #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i" #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
...@@ -434,6 +437,84 @@ static struct vector_fds *user_init_fd_fds(struct arglist *ifspec) ...@@ -434,6 +437,84 @@ static struct vector_fds *user_init_fd_fds(struct arglist *ifspec)
return NULL; return NULL;
} }
/* enough char to store an int type */
#define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2)
#define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3)
/* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */
#define VDE_MAX_ARGC 12
#define VDE_SEQPACKET_HEAD "seqpacket://"
#define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1)
#define VDE_DEFAULT_DESCRIPTION "UML"
static struct vector_fds *user_init_vde_fds(struct arglist *ifspec)
{
char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1];
char *argv[VDE_MAX_ARGC] = {"vde_plug"};
int argc = 1;
int rv;
int sv[2];
struct vector_fds *result = NULL;
char *vnl = uml_vector_fetch_arg(ifspec,"vnl");
char *descr = uml_vector_fetch_arg(ifspec,"descr");
char *port = uml_vector_fetch_arg(ifspec,"port");
char *mode = uml_vector_fetch_arg(ifspec,"mode");
char *group = uml_vector_fetch_arg(ifspec,"group");
if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION;
argv[argc++] = "--descr";
argv[argc++] = descr;
if (port != NULL) {
argv[argc++] = "--port2";
argv[argc++] = port;
}
if (mode != NULL) {
argv[argc++] = "--mod2";
argv[argc++] = mode;
}
if (group != NULL) {
argv[argc++] = "--group2";
argv[argc++] = group;
}
argv[argc++] = seqpacketvnl;
argv[argc++] = vnl;
argv[argc++] = NULL;
rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv);
if (rv < 0) {
printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno);
return NULL;
}
rv = os_set_exec_close(sv[0]);
if (rv < 0) {
printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno);
goto vde_cleanup_sv;
}
snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]);
run_helper(NULL, NULL, argv);
close(sv[1]);
result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
if (result == NULL) {
printk(UM_KERN_ERR "fd open: allocation failed");
goto vde_cleanup;
}
result->rx_fd = sv[0];
result->tx_fd = sv[0];
result->remote_addr_size = 0;
result->remote_addr = NULL;
return result;
vde_cleanup_sv:
close(sv[1]);
vde_cleanup:
close(sv[0]);
return NULL;
}
static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
{ {
int rxfd = -1, txfd = -1; int rxfd = -1, txfd = -1;
...@@ -673,6 +754,8 @@ struct vector_fds *uml_vector_user_open( ...@@ -673,6 +754,8 @@ struct vector_fds *uml_vector_user_open(
return user_init_unix_fds(parsed, ID_BESS); return user_init_unix_fds(parsed, ID_BESS);
if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0) if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0)
return user_init_fd_fds(parsed); return user_init_fd_fds(parsed);
if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0)
return user_init_vde_fds(parsed);
return NULL; return NULL;
} }
......
...@@ -359,11 +359,4 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) ...@@ -359,11 +359,4 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return pte; return pte;
} }
/* Clear a kernel PTE and flush it from the TLB */
#define kpte_clear_flush(ptep, vaddr) \
do { \
pte_clear(&init_mm, (vaddr), (ptep)); \
__flush_tlb_one((vaddr)); \
} while (0)
#endif #endif
...@@ -28,20 +28,10 @@ struct thread_struct { ...@@ -28,20 +28,10 @@ struct thread_struct {
struct arch_thread arch; struct arch_thread arch;
jmp_buf switch_buf; jmp_buf switch_buf;
struct { struct {
int op; struct {
union { int (*proc)(void *);
struct { void *arg;
int pid; } thread;
} fork, exec;
struct {
int (*proc)(void *);
void *arg;
} thread;
struct {
void (*proc)(void *);
void *arg;
} cb;
} u;
} request; } request;
}; };
...@@ -51,7 +41,7 @@ struct thread_struct { ...@@ -51,7 +41,7 @@ struct thread_struct {
.fault_addr = NULL, \ .fault_addr = NULL, \
.prev_sched = NULL, \ .prev_sched = NULL, \
.arch = INIT_ARCH_THREAD, \ .arch = INIT_ARCH_THREAD, \
.request = { 0 } \ .request = { } \
} }
/* /*
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __UM_SYSRQ_H
#define __UM_SYSRQ_H
struct task_struct;
extern void show_trace(struct task_struct* task, unsigned long *stack);
#endif
...@@ -7,10 +7,7 @@ ...@@ -7,10 +7,7 @@
#define __MM_ID_H #define __MM_ID_H
struct mm_id { struct mm_id {
union { int pid;
int mm_fd;
int pid;
} u;
unsigned long stack; unsigned long stack;
int syscall_data_len; int syscall_data_len;
}; };
......
...@@ -10,10 +10,8 @@ ...@@ -10,10 +10,8 @@
extern int userspace_pid[]; extern int userspace_pid[];
extern int user_thread(unsigned long stack, int flags);
extern void new_thread_handler(void); extern void new_thread_handler(void);
extern void handle_syscall(struct uml_pt_regs *regs); extern void handle_syscall(struct uml_pt_regs *regs);
extern long execute_syscall_skas(void *r);
extern unsigned long current_stub_stack(void); extern unsigned long current_stub_stack(void);
extern struct mm_id *current_mm_id(void); extern struct mm_id *current_mm_id(void);
extern void current_mm_sync(void); extern void current_mm_sync(void);
......
...@@ -35,8 +35,5 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) ...@@ -35,8 +35,5 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
PT_REGS_IP(regs) = eip; PT_REGS_IP(regs) = eip;
PT_REGS_SP(regs) = esp; PT_REGS_SP(regs) = esp;
clear_thread_flag(TIF_SINGLESTEP); clear_thread_flag(TIF_SINGLESTEP);
#ifdef SUBARCH_EXECVE1
SUBARCH_EXECVE1(regs->regs);
#endif
} }
EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(start_thread);
...@@ -109,8 +109,8 @@ void new_thread_handler(void) ...@@ -109,8 +109,8 @@ void new_thread_handler(void)
schedule_tail(current->thread.prev_sched); schedule_tail(current->thread.prev_sched);
current->thread.prev_sched = NULL; current->thread.prev_sched = NULL;
fn = current->thread.request.u.thread.proc; fn = current->thread.request.thread.proc;
arg = current->thread.request.u.thread.arg; arg = current->thread.request.thread.arg;
/* /*
* callback returns only if the kernel thread execs a process * callback returns only if the kernel thread execs a process
...@@ -158,8 +158,8 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args) ...@@ -158,8 +158,8 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
arch_copy_thread(&current->thread.arch, &p->thread.arch); arch_copy_thread(&current->thread.arch, &p->thread.arch);
} else { } else {
get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
p->thread.request.u.thread.proc = args->fn; p->thread.request.thread.proc = args->fn;
p->thread.request.u.thread.arg = args->fn_arg; p->thread.request.thread.arg = args->fn_arg;
handler = new_thread_handler; handler = new_thread_handler;
} }
......
...@@ -29,7 +29,7 @@ static void kill_off_processes(void) ...@@ -29,7 +29,7 @@ static void kill_off_processes(void)
t = find_lock_task_mm(p); t = find_lock_task_mm(p);
if (!t) if (!t)
continue; continue;
pid = t->mm->context.id.u.pid; pid = t->mm->context.id.pid;
task_unlock(t); task_unlock(t);
os_kill_ptraced_process(pid, 1); os_kill_ptraced_process(pid, 1);
} }
......
...@@ -32,11 +32,11 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) ...@@ -32,11 +32,11 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
new_id->stack = stack; new_id->stack = stack;
block_signals_trace(); block_signals_trace();
new_id->u.pid = start_userspace(stack); new_id->pid = start_userspace(stack);
unblock_signals_trace(); unblock_signals_trace();
if (new_id->u.pid < 0) { if (new_id->pid < 0) {
ret = new_id->u.pid; ret = new_id->pid;
goto out_free; goto out_free;
} }
...@@ -83,12 +83,12 @@ void destroy_context(struct mm_struct *mm) ...@@ -83,12 +83,12 @@ void destroy_context(struct mm_struct *mm)
* whole UML suddenly dying. Also, cover negative and * whole UML suddenly dying. Also, cover negative and
* 1 cases, since they shouldn't happen either. * 1 cases, since they shouldn't happen either.
*/ */
if (mmu->id.u.pid < 2) { if (mmu->id.pid < 2) {
printk(KERN_ERR "corrupt mm_context - pid = %d\n", printk(KERN_ERR "corrupt mm_context - pid = %d\n",
mmu->id.u.pid); mmu->id.pid);
return; return;
} }
os_kill_ptraced_process(mmu->id.u.pid, 1); os_kill_ptraced_process(mmu->id.pid, 1);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
} }
...@@ -39,8 +39,8 @@ int __init start_uml(void) ...@@ -39,8 +39,8 @@ int __init start_uml(void)
init_new_thread_signals(); init_new_thread_signals();
init_task.thread.request.u.thread.proc = start_kernel_proc; init_task.thread.request.thread.proc = start_kernel_proc;
init_task.thread.request.u.thread.arg = NULL; init_task.thread.request.thread.arg = NULL;
return start_idle_thread(task_stack_page(&init_task), return start_idle_thread(task_stack_page(&init_task),
&init_task.thread.switch_buf); &init_task.thread.switch_buf);
} }
......
...@@ -12,23 +12,13 @@ ...@@ -12,23 +12,13 @@
#include <sysdep/syscalls.h> #include <sysdep/syscalls.h>
#include <linux/time-internal.h> #include <linux/time-internal.h>
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/delay.h>
void handle_syscall(struct uml_pt_regs *r) void handle_syscall(struct uml_pt_regs *r)
{ {
struct pt_regs *regs = container_of(r, struct pt_regs, regs); struct pt_regs *regs = container_of(r, struct pt_regs, regs);
int syscall; int syscall;
/*
* If we have infinite CPU resources, then make every syscall also a
* preemption point, since we don't have any other preemption in this
* case, and kernel threads would basically never run until userspace
* went to sleep, even if said userspace interacts with the kernel in
* various ways.
*/
if (time_travel_mode == TT_MODE_INFCPU ||
time_travel_mode == TT_MODE_EXTERNAL)
schedule();
/* Initialize the syscall number and default return value. */ /* Initialize the syscall number and default return value. */
UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp); UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp);
PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS); PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS);
...@@ -41,9 +31,25 @@ void handle_syscall(struct uml_pt_regs *r) ...@@ -41,9 +31,25 @@ void handle_syscall(struct uml_pt_regs *r)
goto out; goto out;
syscall = UPT_SYSCALL_NR(r); syscall = UPT_SYSCALL_NR(r);
if (syscall >= 0 && syscall < __NR_syscalls) if (syscall >= 0 && syscall < __NR_syscalls) {
PT_REGS_SET_SYSCALL_RETURN(regs, unsigned long ret = EXECUTE_SYSCALL(syscall, regs);
EXECUTE_SYSCALL(syscall, regs));
PT_REGS_SET_SYSCALL_RETURN(regs, ret);
/*
* An error value here can be some form of -ERESTARTSYS
* and then we'd just loop. Make any error syscalls take
* some time, so that it won't just loop if something is
* not ready, and hopefully other things will make some
* progress.
*/
if (IS_ERR_VALUE(ret) &&
(time_travel_mode == TT_MODE_INFCPU ||
time_travel_mode == TT_MODE_EXTERNAL)) {
um_udelay(1);
schedule();
}
}
out: out:
syscall_trace_leave(regs); syscall_trace_leave(regs);
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <linux/sched/debug.h> #include <linux/sched/debug.h>
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
#include <asm/sysrq.h>
#include <asm/stacktrace.h> #include <asm/stacktrace.h>
#include <os.h> #include <os.h>
......
...@@ -839,7 +839,7 @@ static irqreturn_t um_timer(int irq, void *dev) ...@@ -839,7 +839,7 @@ static irqreturn_t um_timer(int irq, void *dev)
if (get_current()->mm != NULL) if (get_current()->mm != NULL)
{ {
/* userspace - relay signal, results in correct userspace timers */ /* userspace - relay signal, results in correct userspace timers */
os_alarm_process(get_current()->mm->context.id.u.pid); os_alarm_process(get_current()->mm->context.id.pid);
} }
(*timer_clockevent.event_handler)(&timer_clockevent); (*timer_clockevent.event_handler)(&timer_clockevent);
......
...@@ -82,16 +82,12 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -82,16 +82,12 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
(x ? UM_PROT_EXEC : 0)); (x ? UM_PROT_EXEC : 0));
if (pte_newpage(*pte)) { if (pte_newpage(*pte)) {
if (pte_present(*pte)) { if (pte_present(*pte)) {
if (pte_newpage(*pte)) { __u64 offset;
__u64 offset; unsigned long phys = pte_val(*pte) & PAGE_MASK;
unsigned long phys = int fd = phys_mapping(phys, &offset);
pte_val(*pte) & PAGE_MASK;
int fd = phys_mapping(phys, &offset); ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE,
prot, fd, offset);
ret = ops->mmap(ops->mm_idp, addr,
PAGE_SIZE, prot, fd,
offset);
}
} else } else
ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE); ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
} else if (pte_newprot(*pte)) } else if (pte_newprot(*pte))
......
...@@ -528,7 +528,8 @@ int os_shutdown_socket(int fd, int r, int w) ...@@ -528,7 +528,8 @@ int os_shutdown_socket(int fd, int r, int w)
ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
void *data, size_t data_len) void *data, size_t data_len)
{ {
char buf[CMSG_SPACE(sizeof(*fds) * n_fds)]; #define MAX_RCV_FDS 2
char buf[CMSG_SPACE(sizeof(*fds) * MAX_RCV_FDS)];
struct cmsghdr *cmsg; struct cmsghdr *cmsg;
struct iovec iov = { struct iovec iov = {
.iov_base = data, .iov_base = data,
...@@ -538,10 +539,13 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds, ...@@ -538,10 +539,13 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
.msg_iov = &iov, .msg_iov = &iov,
.msg_iovlen = 1, .msg_iovlen = 1,
.msg_control = buf, .msg_control = buf,
.msg_controllen = sizeof(buf), .msg_controllen = CMSG_SPACE(sizeof(*fds) * n_fds),
}; };
int n; int n;
if (n_fds > MAX_RCV_FDS)
return -EINVAL;
n = recvmsg(fd, &msg, 0); n = recvmsg(fd, &msg, 0);
if (n < 0) if (n < 0)
return -errno; return -errno;
......
...@@ -78,7 +78,7 @@ static inline long do_syscall_stub(struct mm_id *mm_idp) ...@@ -78,7 +78,7 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
{ {
struct stub_data *proc_data = (void *)mm_idp->stack; struct stub_data *proc_data = (void *)mm_idp->stack;
int n, i; int n, i;
int err, pid = mm_idp->u.pid; int err, pid = mm_idp->pid;
n = ptrace_setregs(pid, syscall_regs); n = ptrace_setregs(pid, syscall_regs);
if (n < 0) { if (n < 0) {
......
...@@ -588,5 +588,5 @@ void reboot_skas(void) ...@@ -588,5 +588,5 @@ void reboot_skas(void)
void __switch_mm(struct mm_id *mm_idp) void __switch_mm(struct mm_id *mm_idp)
{ {
userspace_pid[0] = mm_idp->u.pid; userspace_pid[0] = mm_idp->pid;
} }
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
#include <linux/sched/debug.h> #include <linux/sched/debug.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/sysrq.h>
/* This is declared by <linux/sched.h> */ /* This is declared by <linux/sched.h> */
void show_regs(struct pt_regs *regs) void show_regs(struct pt_regs *regs)
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
#include <linux/utsname.h> #include <linux/utsname.h>
#include <asm/current.h> #include <asm/current.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/sysrq.h>
void show_regs(struct pt_regs *regs) void show_regs(struct pt_regs *regs)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment