Commit 78115f09 authored by Jason Madden's avatar Jason Madden

Update to libev 4.33. Fixes #1754.

parent f7b2b6dc
Revision history for libev, a high-performance and full-featured event loop.
TODO: revisit 59.x timer in the light of modern powersaving
TODO: for next ABI/API change, consider moving EV__IOFDSSET into io->fd instead and provide a getter.
TODO: document EV_TSTAMP_T
4.33 Wed Mar 18 13:22:29 CET 2020
- no changes w.r.t. 4.32.
4.32 (EV only)
- the 4.31 timerfd code wrongly changed the priority of the signal
fd watcher, which is usually harmless unless signal fds are
also used (found via cpan tester service).
- the documentation wrongly claimed that user may modify fd and events
members in io watchers when the watcher was stopped
(found by b_jonas).
- new ev_io_modify mutator which changes only the events member,
which can be faster. also added ev::io::set (int events) method
to ev++.h.
- officially allow a zero events mask for io watchers. this should
work with older libev versions as well but was not officially
allowed before.
- do not wake up every minute when timerfd is used to detect timejumps.
- do not wake up every minute when periodics are disabled and we have
a monotonic clock.
- support a lot more "uncommon" compile time configurations,
such as ev_embed enabled but ev_timer disabled.
- use a start/stop wrapper class to reduce code duplication in
ev++.h and make it needlessly more c++-y.
- the linux aio backend is no longer compiled in by default.
- update to libecb version 0x00010008.
4.31 Fri Dec 20 21:58:29 CET 2019
- handle backends with minimum wait time a bit better by not
waiting in the presence of already-expired timers
......
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for libev 4.31.
# Generated by GNU Autoconf 2.69 for libev 4.33.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
......@@ -587,8 +587,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='libev'
PACKAGE_TARNAME='libev'
PACKAGE_VERSION='4.31'
PACKAGE_STRING='libev 4.31'
PACKAGE_VERSION='4.33'
PACKAGE_STRING='libev 4.33'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
......@@ -1325,7 +1325,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures libev 4.31 to adapt to many kinds of systems.
\`configure' configures libev 4.33 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
......@@ -1396,7 +1396,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of libev 4.31:";;
short | recursive ) echo "Configuration of libev 4.33:";;
esac
cat <<\_ACEOF
......@@ -1509,7 +1509,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
libev configure 4.31
libev configure 4.33
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
......@@ -1928,7 +1928,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by libev $as_me 4.31, which was
It was created by libev $as_me 4.33, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
......@@ -2795,7 +2795,7 @@ fi
# Define the identity of the package.
PACKAGE='libev'
VERSION='4.31'
VERSION='4.33'
cat >>confdefs.h <<_ACEOF
......@@ -12871,7 +12871,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by libev $as_me 4.31, which was
This file was extended by libev $as_me 4.33, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
......@@ -12937,7 +12937,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
libev config.status 4.31
libev config.status 4.33
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
......
/*
* libev simple C++ wrapper classes
*
* Copyright (c) 2007,2008,2010,2018 Marc Alexander Lehmann <libev@schmorp.de>
* Copyright (c) 2007,2008,2010,2018,2020 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
......@@ -421,6 +421,24 @@ namespace ev {
template<class ev_watcher, class watcher>
struct base : ev_watcher
{
// scoped pause/unpause of a watcher
struct freeze_guard
{
watcher &w;
bool active;
freeze_guard (watcher *self) EV_NOEXCEPT
: w (*self), active (w.is_active ())
{
if (active) w.stop ();
}
~freeze_guard ()
{
if (active) w.start ();
}
};
#if EV_MULTIPLICITY
EV_PX;
......@@ -614,18 +632,14 @@ namespace ev {
EV_BEGIN_WATCHER (io, io)
void set (int fd, int events) EV_NOEXCEPT
{
int active = is_active ();
if (active) stop ();
freeze_guard freeze (this);
ev_io_set (static_cast<ev_io *>(this), fd, events);
if (active) start ();
}
void set (int events) EV_NOEXCEPT
{
int active = is_active ();
if (active) stop ();
ev_io_set (static_cast<ev_io *>(this), fd, events);
if (active) start ();
freeze_guard freeze (this);
ev_io_modify (static_cast<ev_io *>(this), events);
}
void start (int fd, int events) EV_NOEXCEPT
......@@ -638,10 +652,8 @@ namespace ev {
EV_BEGIN_WATCHER (timer, timer)
void set (ev_tstamp after, ev_tstamp repeat = 0.) EV_NOEXCEPT
{
int active = is_active ();
if (active) stop ();
freeze_guard freeze (this);
ev_timer_set (static_cast<ev_timer *>(this), after, repeat);
if (active) start ();
}
void start (ev_tstamp after, ev_tstamp repeat = 0.) EV_NOEXCEPT
......@@ -665,10 +677,8 @@ namespace ev {
EV_BEGIN_WATCHER (periodic, periodic)
void set (ev_tstamp at, ev_tstamp interval = 0.) EV_NOEXCEPT
{
int active = is_active ();
if (active) stop ();
freeze_guard freeze (this);
ev_periodic_set (static_cast<ev_periodic *>(this), at, interval, 0);
if (active) start ();
}
void start (ev_tstamp at, ev_tstamp interval = 0.) EV_NOEXCEPT
......@@ -688,10 +698,8 @@ namespace ev {
EV_BEGIN_WATCHER (sig, signal)
void set (int signum) EV_NOEXCEPT
{
int active = is_active ();
if (active) stop ();
freeze_guard freeze (this);
ev_signal_set (static_cast<ev_signal *>(this), signum);
if (active) start ();
}
void start (int signum) EV_NOEXCEPT
......@@ -706,10 +714,8 @@ namespace ev {
EV_BEGIN_WATCHER (child, child)
void set (int pid, int trace = 0) EV_NOEXCEPT
{
int active = is_active ();
if (active) stop ();
freeze_guard freeze (this);
ev_child_set (static_cast<ev_child *>(this), pid, trace);
if (active) start ();
}
void start (int pid, int trace = 0) EV_NOEXCEPT
......@@ -724,10 +730,8 @@ namespace ev {
EV_BEGIN_WATCHER (stat, stat)
void set (const char *path, ev_tstamp interval = 0.) EV_NOEXCEPT
{
int active = is_active ();
if (active) stop ();
freeze_guard freeze (this);
ev_stat_set (static_cast<ev_stat *>(this), path, interval);
if (active) start ();
}
void start (const char *path, ev_tstamp interval = 0.) EV_NOEXCEPT
......@@ -766,10 +770,8 @@ namespace ev {
EV_BEGIN_WATCHER (embed, embed)
void set_embed (struct ev_loop *embedded_loop) EV_NOEXCEPT
{
int active = is_active ();
if (active) stop ();
freeze_guard freeze (this);
ev_embed_set (static_cast<ev_embed *>(this), embedded_loop);
if (active) start ();
}
void start (struct ev_loop *embedded_loop) EV_NOEXCEPT
......
......@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "LIBEV 3"
.TH LIBEV 3 "2019-12-21" "libev-4.31" "libev - high performance full featured event loop"
.TH LIBEV 3 "2020-03-12" "libev-4.31" "libev - high performance full featured event loop"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
......@@ -1351,8 +1351,9 @@ with a watcher-specific start function (\f(CW\*(C`ev_TYPE_start (loop, watcher
corresponding stop function (\f(CW\*(C`ev_TYPE_stop (loop, watcher *)\*(C'\fR.
.PP
As long as your watcher is active (has been started but not stopped) you
must not touch the values stored in it. Most specifically you must never
reinitialise it or call its \f(CW\*(C`ev_TYPE_set\*(C'\fR macro.
must not touch the values stored in it except when explicitly documented
otherwise. Most specifically you must never reinitialise it or call its
\&\f(CW\*(C`ev_TYPE_set\*(C'\fR macro.
.PP
Each and every callback receives the event loop pointer as first, the
registered watcher structure as second, and a bitset of received events as
......@@ -1769,14 +1770,18 @@ This section describes each watcher in detail, but will not repeat
information given in the last section. Any initialisation/set macros,
functions and members specific to the watcher type are explained.
.PP
Members are additionally marked with either \fI[read\-only]\fR, meaning that,
while the watcher is active, you can look at the member and expect some
sensible content, but you must not modify it (you can modify it while the
watcher is stopped to your hearts content), or \fI[read\-write]\fR, which
means you can expect it to have some sensible content while the watcher
is active, but you can also modify it. Modifying it may not do something
Most members are additionally marked with either \fI[read\-only]\fR, meaning
that, while the watcher is active, you can look at the member and expect
some sensible content, but you must not modify it (you can modify it while
the watcher is stopped to your hearts content), or \fI[read\-write]\fR, which
means you can expect it to have some sensible content while the watcher is
active, but you can also modify it (within the same thread as the event
loop, i.e. without creating data races). Modifying it may not do something
sensible or take immediate effect (or do anything at all), but libev will
not crash or malfunction in any way.
.PP
In any case, the documentation for each member will explain what the
effects are, and if there are any additional access restrictions.
.ie n .SS """ev_io"" \- is this file descriptor readable or writable?"
.el .SS "\f(CWev_io\fP \- is this file descriptor readable or writable?"
.IX Subsection "ev_io - is this file descriptor readable or writable?"
......@@ -1952,14 +1957,33 @@ opportunity for a DoS attack.
.IX Item "ev_io_set (ev_io *, int fd, int events)"
.PD
Configures an \f(CW\*(C`ev_io\*(C'\fR watcher. The \f(CW\*(C`fd\*(C'\fR is the file descriptor to
receive events for and \f(CW\*(C`events\*(C'\fR is either \f(CW\*(C`EV_READ\*(C'\fR, \f(CW\*(C`EV_WRITE\*(C'\fR or
\&\f(CW\*(C`EV_READ | EV_WRITE\*(C'\fR, to express the desire to receive the given events.
.IP "int fd [read\-only]" 4
.IX Item "int fd [read-only]"
The file descriptor being watched.
.IP "int events [read\-only]" 4
.IX Item "int events [read-only]"
The events being watched.
receive events for and \f(CW\*(C`events\*(C'\fR is either \f(CW\*(C`EV_READ\*(C'\fR, \f(CW\*(C`EV_WRITE\*(C'\fR, both
\&\f(CW\*(C`EV_READ | EV_WRITE\*(C'\fR or \f(CW0\fR, to express the desire to receive the given
events.
.Sp
Note that setting the \f(CW\*(C`events\*(C'\fR to \f(CW0\fR and starting the watcher is
supported, but not specially optimized \- if your program sometimes happens
to generate this combination this is fine, but if it is easy to avoid
starting an io watcher watching for no events you should do so.
.IP "ev_io_modify (ev_io *, int events)" 4
.IX Item "ev_io_modify (ev_io *, int events)"
Similar to \f(CW\*(C`ev_io_set\*(C'\fR, but only changes the requested events. Using this
might be faster with some backends, as libev can assume that the \f(CW\*(C`fd\*(C'\fR
still refers to the same underlying file description, something it cannot
do when using \f(CW\*(C`ev_io_set\*(C'\fR.
.IP "int fd [no\-modify]" 4
.IX Item "int fd [no-modify]"
The file descriptor being watched. While it can be read at any time, you
must not modify this member even when the watcher is stopped \- always use
\&\f(CW\*(C`ev_io_set\*(C'\fR for that.
.IP "int events [no\-modify]" 4
.IX Item "int events [no-modify]"
The set of events the fd is being watched for, among other flags. Remember
that this is a bit set \- to test for \f(CW\*(C`EV_READ\*(C'\fR, use \f(CW\*(C`w\->events &
EV_READ\*(C'\fR, and similarly for \f(CW\*(C`EV_WRITE\*(C'\fR.
.Sp
As with \f(CW\*(C`fd\*(C'\fR, you must not modify this member even when the watcher is
stopped, always use \f(CW\*(C`ev_io_set\*(C'\fR or \f(CW\*(C`ev_io_modify\*(C'\fR for that.
.PP
\fIExamples\fR
.IX Subsection "Examples"
......@@ -4397,6 +4421,9 @@ method.
.Sp
For \f(CW\*(C`ev::embed\*(C'\fR watchers this method is called \f(CW\*(C`set_embed\*(C'\fR, to avoid
clashing with the \f(CW\*(C`set (loop)\*(C'\fR method.
.Sp
For \f(CW\*(C`ev::io\*(C'\fR watchers there is an additional \f(CW\*(C`set\*(C'\fR method that acepts a
new event mask only, and internally calls \f(CW\*(C`ev_io_modfify\*(C'\fR.
.IP "w\->start ()" 4
.IX Item "w->start ()"
Starts the watcher. Note that there is no \f(CW\*(C`loop\*(C'\fR argument, as the
......
......@@ -119,7 +119,7 @@
# if HAVE_LINUX_AIO_ABI_H
# ifndef EV_USE_LINUXAIO
# define EV_USE_LINUXAIO EV_FEATURE_BACKENDS
# define EV_USE_LINUXAIO 0 /* was: EV_FEATURE_BACKENDS, always off by default */
# endif
# else
# undef EV_USE_LINUXAIO
......@@ -346,7 +346,7 @@
#ifndef EV_USE_LINUXAIO
# if __linux /* libev currently assumes linux/aio_abi.h is always available on linux */
# define EV_USE_LINUXAIO 1
# define EV_USE_LINUXAIO 0 /* was: 1, always off by default */
# else
# define EV_USE_LINUXAIO 0
# endif
......@@ -581,6 +581,7 @@ struct signalfd_siginfo
#define MIN_TIMEJUMP 1. /* minimum timejump that gets detected (if monotonic clock available) */
#define MAX_BLOCKTIME 59.743 /* never wait longer than this time (to detect time jumps) */
#define MAX_BLOCKTIME2 1500001.07 /* same, but when timerfd is used to detect jumps, also safe delay to not overflow */
/* find a portable timestamp that is "always" in the future but fits into time_t.
* this is quite hard, and we are mostly guessing - we handle 32 bit signed/unsigned time_t,
......@@ -605,7 +606,7 @@ struct signalfd_siginfo
/*
* libecb - http://software.schmorp.de/pkg/libecb
*
* Copyright (©) 2009-2015 Marc Alexander Lehmann <libecb@schmorp.de>
* Copyright (©) 2009-2015,2018-2020 Marc Alexander Lehmann <libecb@schmorp.de>
* Copyright (©) 2011 Emanuele Giaquinta
* All rights reserved.
*
......@@ -646,15 +647,23 @@ struct signalfd_siginfo
#define ECB_H
/* 16 bits major, 16 bits minor */
#define ECB_VERSION 0x00010006
#define ECB_VERSION 0x00010008
#ifdef _WIN32
#include <string.h> /* for memcpy */
#if defined (_WIN32) && !defined (__MINGW32__)
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef signed char int_fast8_t;
typedef unsigned char uint_fast8_t;
typedef signed short int16_t;
typedef unsigned short uint16_t;
typedef signed int int_fast16_t;
typedef unsigned int uint_fast16_t;
typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef signed int int_fast32_t;
typedef unsigned int uint_fast32_t;
#if __GNUC__
typedef signed long long int64_t;
typedef unsigned long long uint64_t;
......@@ -662,6 +671,8 @@ struct signalfd_siginfo
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
#endif
typedef int64_t int_fast64_t;
typedef uint64_t uint_fast64_t;
#ifdef _WIN64
#define ECB_PTRSIZE 8
typedef uint64_t uintptr_t;
......@@ -683,6 +694,14 @@ struct signalfd_siginfo
#define ECB_GCC_AMD64 (__amd64 || __amd64__ || __x86_64 || __x86_64__)
#define ECB_MSVC_AMD64 (_M_AMD64 || _M_X64)
#ifndef ECB_OPTIMIZE_SIZE
#if __OPTIMIZE_SIZE__
#define ECB_OPTIMIZE_SIZE 1
#else
#define ECB_OPTIMIZE_SIZE 0
#endif
#endif
/* work around x32 idiocy by defining proper macros */
#if ECB_GCC_AMD64 || ECB_MSVC_AMD64
#if _ILP32
......@@ -1198,6 +1217,44 @@ ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { retu
ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (64 - count)) | (x << count); }
ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (64 - count)) | (x >> count); }
#if ECB_CPP
inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); }
inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); }
inline uint32_t ecb_ctz (uint32_t v) { return ecb_ctz32 (v); }
inline uint64_t ecb_ctz (uint64_t v) { return ecb_ctz64 (v); }
inline bool ecb_is_pot (uint8_t v) { return ecb_is_pot32 (v); }
inline bool ecb_is_pot (uint16_t v) { return ecb_is_pot32 (v); }
inline bool ecb_is_pot (uint32_t v) { return ecb_is_pot32 (v); }
inline bool ecb_is_pot (uint64_t v) { return ecb_is_pot64 (v); }
inline int ecb_ld (uint8_t v) { return ecb_ld32 (v); }
inline int ecb_ld (uint16_t v) { return ecb_ld32 (v); }
inline int ecb_ld (uint32_t v) { return ecb_ld32 (v); }
inline int ecb_ld (uint64_t v) { return ecb_ld64 (v); }
inline int ecb_popcount (uint8_t v) { return ecb_popcount32 (v); }
inline int ecb_popcount (uint16_t v) { return ecb_popcount32 (v); }
inline int ecb_popcount (uint32_t v) { return ecb_popcount32 (v); }
inline int ecb_popcount (uint64_t v) { return ecb_popcount64 (v); }
inline uint8_t ecb_bitrev (uint8_t v) { return ecb_bitrev8 (v); }
inline uint16_t ecb_bitrev (uint16_t v) { return ecb_bitrev16 (v); }
inline uint32_t ecb_bitrev (uint32_t v) { return ecb_bitrev32 (v); }
inline uint8_t ecb_rotl (uint8_t v, unsigned int count) { return ecb_rotl8 (v, count); }
inline uint16_t ecb_rotl (uint16_t v, unsigned int count) { return ecb_rotl16 (v, count); }
inline uint32_t ecb_rotl (uint32_t v, unsigned int count) { return ecb_rotl32 (v, count); }
inline uint64_t ecb_rotl (uint64_t v, unsigned int count) { return ecb_rotl64 (v, count); }
inline uint8_t ecb_rotr (uint8_t v, unsigned int count) { return ecb_rotr8 (v, count); }
inline uint16_t ecb_rotr (uint16_t v, unsigned int count) { return ecb_rotr16 (v, count); }
inline uint32_t ecb_rotr (uint32_t v, unsigned int count) { return ecb_rotr32 (v, count); }
inline uint64_t ecb_rotr (uint64_t v, unsigned int count) { return ecb_rotr64 (v, count); }
#endif
#if ECB_GCC_VERSION(4,3) || (ECB_CLANG_BUILTIN(__builtin_bswap32) && ECB_CLANG_BUILTIN(__builtin_bswap64))
#if ECB_GCC_VERSION(4,8) || ECB_CLANG_BUILTIN(__builtin_bswap16)
#define ecb_bswap16(x) __builtin_bswap16 (x)
......@@ -1278,6 +1335,78 @@ ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_he
ecb_inline ecb_const ecb_bool ecb_little_endian (void);
ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; }
/*****************************************************************************/
/* unaligned load/store */
ecb_inline uint_fast16_t ecb_be_u16_to_host (uint_fast16_t v) { return ecb_little_endian () ? ecb_bswap16 (v) : v; }
ecb_inline uint_fast32_t ecb_be_u32_to_host (uint_fast32_t v) { return ecb_little_endian () ? ecb_bswap32 (v) : v; }
ecb_inline uint_fast64_t ecb_be_u64_to_host (uint_fast64_t v) { return ecb_little_endian () ? ecb_bswap64 (v) : v; }
ecb_inline uint_fast16_t ecb_le_u16_to_host (uint_fast16_t v) { return ecb_big_endian () ? ecb_bswap16 (v) : v; }
ecb_inline uint_fast32_t ecb_le_u32_to_host (uint_fast32_t v) { return ecb_big_endian () ? ecb_bswap32 (v) : v; }
ecb_inline uint_fast64_t ecb_le_u64_to_host (uint_fast64_t v) { return ecb_big_endian () ? ecb_bswap64 (v) : v; }
ecb_inline uint_fast16_t ecb_peek_u16_u (const void *ptr) { uint16_t v; memcpy (&v, ptr, sizeof (v)); return v; }
ecb_inline uint_fast32_t ecb_peek_u32_u (const void *ptr) { uint32_t v; memcpy (&v, ptr, sizeof (v)); return v; }
ecb_inline uint_fast64_t ecb_peek_u64_u (const void *ptr) { uint64_t v; memcpy (&v, ptr, sizeof (v)); return v; }
ecb_inline uint_fast16_t ecb_peek_be_u16_u (const void *ptr) { return ecb_be_u16_to_host (ecb_peek_u16_u (ptr)); }
ecb_inline uint_fast32_t ecb_peek_be_u32_u (const void *ptr) { return ecb_be_u32_to_host (ecb_peek_u32_u (ptr)); }
ecb_inline uint_fast64_t ecb_peek_be_u64_u (const void *ptr) { return ecb_be_u64_to_host (ecb_peek_u64_u (ptr)); }
ecb_inline uint_fast16_t ecb_peek_le_u16_u (const void *ptr) { return ecb_le_u16_to_host (ecb_peek_u16_u (ptr)); }
ecb_inline uint_fast32_t ecb_peek_le_u32_u (const void *ptr) { return ecb_le_u32_to_host (ecb_peek_u32_u (ptr)); }
ecb_inline uint_fast64_t ecb_peek_le_u64_u (const void *ptr) { return ecb_le_u64_to_host (ecb_peek_u64_u (ptr)); }
ecb_inline uint_fast16_t ecb_host_to_be_u16 (uint_fast16_t v) { return ecb_little_endian () ? ecb_bswap16 (v) : v; }
ecb_inline uint_fast32_t ecb_host_to_be_u32 (uint_fast32_t v) { return ecb_little_endian () ? ecb_bswap32 (v) : v; }
ecb_inline uint_fast64_t ecb_host_to_be_u64 (uint_fast64_t v) { return ecb_little_endian () ? ecb_bswap64 (v) : v; }
ecb_inline uint_fast16_t ecb_host_to_le_u16 (uint_fast16_t v) { return ecb_big_endian () ? ecb_bswap16 (v) : v; }
ecb_inline uint_fast32_t ecb_host_to_le_u32 (uint_fast32_t v) { return ecb_big_endian () ? ecb_bswap32 (v) : v; }
ecb_inline uint_fast64_t ecb_host_to_le_u64 (uint_fast64_t v) { return ecb_big_endian () ? ecb_bswap64 (v) : v; }
ecb_inline void ecb_poke_u16_u (void *ptr, uint16_t v) { memcpy (ptr, &v, sizeof (v)); }
ecb_inline void ecb_poke_u32_u (void *ptr, uint32_t v) { memcpy (ptr, &v, sizeof (v)); }
ecb_inline void ecb_poke_u64_u (void *ptr, uint64_t v) { memcpy (ptr, &v, sizeof (v)); }
ecb_inline void ecb_poke_be_u16_u (void *ptr, uint_fast16_t v) { ecb_poke_u16_u (ptr, ecb_host_to_be_u16 (v)); }
ecb_inline void ecb_poke_be_u32_u (void *ptr, uint_fast32_t v) { ecb_poke_u32_u (ptr, ecb_host_to_be_u32 (v)); }
ecb_inline void ecb_poke_be_u64_u (void *ptr, uint_fast64_t v) { ecb_poke_u64_u (ptr, ecb_host_to_be_u64 (v)); }
ecb_inline void ecb_poke_le_u16_u (void *ptr, uint_fast16_t v) { ecb_poke_u16_u (ptr, ecb_host_to_le_u16 (v)); }
ecb_inline void ecb_poke_le_u32_u (void *ptr, uint_fast32_t v) { ecb_poke_u32_u (ptr, ecb_host_to_le_u32 (v)); }
ecb_inline void ecb_poke_le_u64_u (void *ptr, uint_fast64_t v) { ecb_poke_u64_u (ptr, ecb_host_to_le_u64 (v)); }
#if ECB_CPP
inline uint8_t ecb_bswap (uint8_t v) { return v; }
inline uint16_t ecb_bswap (uint16_t v) { return ecb_bswap16 (v); }
inline uint32_t ecb_bswap (uint32_t v) { return ecb_bswap32 (v); }
inline uint64_t ecb_bswap (uint64_t v) { return ecb_bswap64 (v); }
template<typename T> inline T ecb_be_to_host (T v) { return ecb_little_endian () ? ecb_bswap (v) : v; }
template<typename T> inline T ecb_le_to_host (T v) { return ecb_big_endian () ? ecb_bswap (v) : v; }
template<typename T> inline T ecb_peek (const void *ptr) { return *(const T *)ptr; }
template<typename T> inline T ecb_peek_be (const void *ptr) { return ecb_be_to_host (ecb_peek <T> (ptr)); }
template<typename T> inline T ecb_peek_le (const void *ptr) { return ecb_le_to_host (ecb_peek <T> (ptr)); }
template<typename T> inline T ecb_peek_u (const void *ptr) { T v; memcpy (&v, ptr, sizeof (v)); return v; }
template<typename T> inline T ecb_peek_be_u (const void *ptr) { return ecb_be_to_host (ecb_peek_u<T> (ptr)); }
template<typename T> inline T ecb_peek_le_u (const void *ptr) { return ecb_le_to_host (ecb_peek_u<T> (ptr)); }
template<typename T> inline T ecb_host_to_be (T v) { return ecb_little_endian () ? ecb_bswap (v) : v; }
template<typename T> inline T ecb_host_to_le (T v) { return ecb_big_endian () ? ecb_bswap (v) : v; }
template<typename T> inline void ecb_poke (void *ptr, T v) { *(T *)ptr = v; }
template<typename T> inline void ecb_poke_be (void *ptr, T v) { return ecb_poke <T> (ptr, ecb_host_to_be (v)); }
template<typename T> inline void ecb_poke_le (void *ptr, T v) { return ecb_poke <T> (ptr, ecb_host_to_le (v)); }
template<typename T> inline void ecb_poke_u (void *ptr, T v) { memcpy (ptr, &v, sizeof (v)); }
template<typename T> inline void ecb_poke_be_u (void *ptr, T v) { return ecb_poke_u<T> (ptr, ecb_host_to_be (v)); }
template<typename T> inline void ecb_poke_le_u (void *ptr, T v) { return ecb_poke_u<T> (ptr, ecb_host_to_le (v)); }
#endif
/*****************************************************************************/
#if ECB_GCC_VERSION(3,0) || ECB_C99
#define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0))
#else
......@@ -1311,6 +1440,8 @@ ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_he
#define ecb_array_length(name) (sizeof (name) / sizeof (name [0]))
#endif
/*****************************************************************************/
ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x);
ecb_function_ ecb_const uint32_t
ecb_binary16_to_binary32 (uint32_t x)
......@@ -1428,7 +1559,6 @@ ecb_binary32_to_binary16 (uint32_t x)
|| (defined __arm__ && (defined __ARM_EABI__ || defined __EABI__ || defined __VFP_FP__ || defined _WIN32_WCE || defined __ANDROID__)) \
|| defined __aarch64__
#define ECB_STDFP 1
#include <string.h> /* for memcpy */
#else
#define ECB_STDFP 0
#endif
......@@ -1660,7 +1790,7 @@ ecb_binary32_to_binary16 (uint32_t x)
* TODO: arm is also common nowadays, maybe even mips and x86
* TODO: after implementing this, it suddenly looks like overkill, but its hard to remove...
*/
#if __GNUC__ && __linux && ECB_AMD64 && !defined __OPTIMIZE_SIZE__
#if __GNUC__ && __linux && ECB_AMD64 && !EV_FEATURE_CODE
/* the costly errno access probably kills this for size optimisation */
#define ev_syscall(nr,narg,arg1,arg2,arg3,arg4,arg5,arg6) \
......@@ -2264,8 +2394,20 @@ fd_reify (EV_P)
{
int i;
/* most backends do not modify the fdchanges list in backend_modfiy.
* except io_uring, which has fixed-size buffers which might force us
* to handle events in backend_modify, causing fdchanges to be amended,
* which could result in an endless loop.
* to avoid this, we do not dynamically handle fds that were added
* during fd_reify. that means that for those backends, fdchangecnt
* might be non-zero during poll, which must cause them to not block.
* to not put too much of a burden on other backends, this detail
* needs to be handled in the backend.
*/
int changecnt = fdchangecnt;
#if EV_SELECT_IS_WINSOCKET || EV_USE_IOCP
for (i = 0; i < fdchangecnt; ++i)
for (i = 0; i < changecnt; ++i)
{
int fd = fdchanges [i];
ANFD *anfd = anfds + fd;
......@@ -2289,7 +2431,7 @@ fd_reify (EV_P)
}
#endif
for (i = 0; i < fdchangecnt; ++i)
for (i = 0; i < changecnt; ++i)
{
int fd = fdchanges [i];
ANFD *anfd = anfds + fd;
......@@ -2315,7 +2457,14 @@ fd_reify (EV_P)
backend_modify (EV_A_ fd, o_events, anfd->events);
}
fdchangecnt = 0;
/* normally, fdchangecnt hasn't changed. if it has, then new fds have been added.
* this is a rare case (see beginning comment in this function), so we copy them to the
* front and hope the backend handles this case.
*/
if (ecb_expect_false (fdchangecnt != changecnt))
memmove (fdchanges, fdchanges + changecnt, (fdchangecnt - changecnt) * sizeof (*fdchanges));
fdchangecnt -= changecnt;
}
/* something about the given fd changed */
......@@ -2324,7 +2473,7 @@ void
fd_change (EV_P_ int fd, int flags)
{
unsigned char reify = anfds [fd].reify;
anfds [fd].reify |= flags;
anfds [fd].reify = reify | flags;
if (ecb_expect_true (!reify))
{
......@@ -2563,7 +2712,7 @@ reheap (ANHE *heap, int N)
/*****************************************************************************/
/* associate signal watchers to a signal signal */
/* associate signal watchers to a signal */
typedef struct
{
EV_ATOMIC_T pending;
......@@ -2886,10 +3035,7 @@ timerfdcb (EV_P_ ev_io *iow, int revents)
{
struct itimerspec its = { 0 };
/* since we can't easily come zup with a (portable) maximum value of time_t,
* we wake up once per month, which hopefully is rare enough to not
* be a problem. */
its.it_value.tv_sec = ev_rt_now + 86400 * 30;
its.it_value.tv_sec = ev_rt_now + (int)MAX_BLOCKTIME2;
timerfd_settime (timerfd, TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET, &its, 0);
ev_rt_now = ev_time ();
......@@ -2899,7 +3045,9 @@ timerfdcb (EV_P_ ev_io *iow, int revents)
now_floor = EV_TS_CONST (0.);
time_update (EV_A_ EV_TSTAMP_HUGE);
*/
#if EV_PERIODIC_ENABLE
periodics_reschedule (EV_A);
#endif
}
ecb_noinline ecb_cold
......@@ -2915,7 +3063,7 @@ evtimerfd_init (EV_P)
fd_intern (timerfd); /* just to be sure */
ev_io_init (&timerfd_w, timerfdcb, timerfd, EV_READ);
ev_set_priority (&sigfd_w, EV_MINPRI);
ev_set_priority (&timerfd_w, EV_MINPRI);
ev_io_start (EV_A_ &timerfd_w);
ev_unref (EV_A); /* watcher should not keep loop alive */
......@@ -2987,8 +3135,8 @@ ev_supported_backends (void) EV_NOEXCEPT
if (EV_USE_PORT ) flags |= EVBACKEND_PORT;
if (EV_USE_KQUEUE ) flags |= EVBACKEND_KQUEUE;
if (EV_USE_EPOLL ) flags |= EVBACKEND_EPOLL;
if (EV_USE_LINUXAIO) flags |= EVBACKEND_LINUXAIO;
if (EV_USE_IOURING ) flags |= EVBACKEND_IOURING;
if (EV_USE_LINUXAIO ) flags |= EVBACKEND_LINUXAIO;
if (EV_USE_IOURING && ev_linux_version () >= 0x050601) flags |= EVBACKEND_IOURING; /* 5.6.1+ */
if (EV_USE_POLL ) flags |= EVBACKEND_POLL;
if (EV_USE_SELECT ) flags |= EVBACKEND_SELECT;
......@@ -3031,7 +3179,7 @@ ecb_cold
unsigned int
ev_embeddable_backends (void) EV_NOEXCEPT
{
int flags = EVBACKEND_EPOLL | EVBACKEND_KQUEUE | EVBACKEND_PORT;
int flags = EVBACKEND_EPOLL | EVBACKEND_KQUEUE | EVBACKEND_PORT | EVBACKEND_IOURING;
/* epoll embeddability broken on all linux versions up to at least 2.6.23 */
if (ev_linux_version () < 0x020620) /* disable it on linux < 2.6.32 */
......@@ -3039,11 +3187,6 @@ ev_embeddable_backends (void) EV_NOEXCEPT
/* EVBACKEND_LINUXAIO is theoretically embeddable, but suffers from a performance overhead */
/* EVBACKEND_IOURING is practically embeddable, but the current implementation is not
* because our backend_fd is the epoll fd we need as fallback.
* if the kernel ever is fixed, this might change...
*/
return flags;
}
......@@ -3951,6 +4094,18 @@ ev_run (EV_P_ int flags)
{
waittime = EV_TS_CONST (MAX_BLOCKTIME);
#if EV_USE_TIMERFD
/* sleep a lot longer when we can reliably detect timejumps */
if (ecb_expect_true (timerfd >= 0))
waittime = EV_TS_CONST (MAX_BLOCKTIME2);
#endif
#if !EV_PERIODIC_ENABLE
/* without periodics but with monotonic clock there is no need */
/* for any time jump detection, so sleep longer */
if (ecb_expect_true (have_monotonic))
waittime = EV_TS_CONST (MAX_BLOCKTIME2);
#endif
if (timercnt)
{
ev_tstamp to = ANHE_at (timers [HEAP0]) - mn_now;
......@@ -5078,6 +5233,7 @@ embed_prepare_cb (EV_P_ ev_prepare *prepare, int revents)
}
}
#if EV_FORK_ENABLE
static void
embed_fork_cb (EV_P_ ev_fork *fork_w, int revents)
{
......@@ -5094,6 +5250,7 @@ embed_fork_cb (EV_P_ ev_fork *fork_w, int revents)
ev_embed_start (EV_A_ w);
}
#endif
#if 0
static void
......@@ -5124,8 +5281,10 @@ ev_embed_start (EV_P_ ev_embed *w) EV_NOEXCEPT
ev_set_priority (&w->prepare, EV_MINPRI);
ev_prepare_start (EV_A_ &w->prepare);
#if EV_FORK_ENABLE
ev_fork_init (&w->fork, embed_fork_cb);
ev_fork_start (EV_A_ &w->fork);
#endif
/*ev_idle_init (&w->idle, e,bed_idle_cb);*/
......@@ -5145,7 +5304,9 @@ ev_embed_stop (EV_P_ ev_embed *w) EV_NOEXCEPT
ev_io_stop (EV_A_ &w->io);
ev_prepare_stop (EV_A_ &w->prepare);
#if EV_FORK_ENABLE
ev_fork_stop (EV_A_ &w->fork);
#endif
ev_stop (EV_A_ (W)w);
......
/*
* libev native API header
*
* Copyright (c) 2007-2019 Marc Alexander Lehmann <libev@schmorp.de>
* Copyright (c) 2007-2020 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
......@@ -215,7 +215,7 @@ struct ev_loop;
/*****************************************************************************/
#define EV_VERSION_MAJOR 4
#define EV_VERSION_MINOR 31
#define EV_VERSION_MINOR 33
/* eventmask, revents, events... */
enum {
......@@ -392,14 +392,12 @@ typedef struct ev_stat
} ev_stat;
#endif
#if EV_IDLE_ENABLE
/* invoked when the nothing else needs to be done, keeps the process from blocking */
/* revent EV_IDLE */
typedef struct ev_idle
{
EV_WATCHER (ev_idle)
} ev_idle;
#endif
/* invoked for each run of the mainloop, just before the blocking call */
/* you can still change events in any way you like */
......@@ -416,23 +414,19 @@ typedef struct ev_check
EV_WATCHER (ev_check)
} ev_check;
#if EV_FORK_ENABLE
/* the callback gets invoked before check in the child process when a fork was detected */
/* revent EV_FORK */
typedef struct ev_fork
{
EV_WATCHER (ev_fork)
} ev_fork;
#endif
#if EV_CLEANUP_ENABLE
/* is invoked just before the loop gets destroyed */
/* revent EV_CLEANUP */
typedef struct ev_cleanup
{
EV_WATCHER (ev_cleanup)
} ev_cleanup;
#endif
#if EV_EMBED_ENABLE
/* used to embed an event loop inside another */
......@@ -442,16 +436,18 @@ typedef struct ev_embed
EV_WATCHER (ev_embed)
struct ev_loop *other; /* ro */
#undef EV_IO_ENABLE
#define EV_IO_ENABLE 1
ev_io io; /* private */
#undef EV_PREPARE_ENABLE
#define EV_PREPARE_ENABLE 1
ev_prepare prepare; /* private */
ev_check check; /* unused */
ev_timer timer; /* unused */
ev_periodic periodic; /* unused */
ev_idle idle; /* unused */
ev_fork fork; /* private */
#if EV_CLEANUP_ENABLE
ev_cleanup cleanup; /* unused */
#endif
} ev_embed;
#endif
......@@ -526,7 +522,7 @@ enum {
EVBACKEND_KQUEUE = 0x00000008U, /* bsd, broken on osx */
EVBACKEND_DEVPOLL = 0x00000010U, /* solaris 8 */ /* NYI */
EVBACKEND_PORT = 0x00000020U, /* solaris 10 */
EVBACKEND_LINUXAIO = 0x00000040U, /* linuix AIO, 4.19+ */
EVBACKEND_LINUXAIO = 0x00000040U, /* linux AIO, 4.19+ */
EVBACKEND_IOURING = 0x00000080U, /* linux io_uring, 5.1+ */
EVBACKEND_ALL = 0x000000FFU, /* all known backends */
EVBACKEND_MASK = 0x0000FFFFU /* all future backends */
......@@ -660,6 +656,8 @@ EV_API_DECL void ev_unref (EV_P) EV_NOEXCEPT;
*/
EV_API_DECL void ev_once (EV_P_ int fd, int events, ev_tstamp timeout, void (*cb)(int revents, void *arg), void *arg) EV_NOEXCEPT;
EV_API_DECL void ev_invoke_pending (EV_P); /* invoke all pending watchers */
# if EV_FEATURE_API
EV_API_DECL unsigned int ev_iteration (EV_P) EV_NOEXCEPT; /* number of loop iterations */
EV_API_DECL unsigned int ev_depth (EV_P) EV_NOEXCEPT; /* #ev_loop enters - #ev_loop leaves */
......@@ -677,7 +675,6 @@ EV_API_DECL void ev_set_invoke_pending_cb (EV_P_ ev_loop_callback invoke_pending
EV_API_DECL void ev_set_loop_release_cb (EV_P_ void (*release)(EV_P) EV_NOEXCEPT, void (*acquire)(EV_P) EV_NOEXCEPT) EV_NOEXCEPT;
EV_API_DECL unsigned int ev_pending_count (EV_P) EV_NOEXCEPT; /* number of pending events, if any */
EV_API_DECL void ev_invoke_pending (EV_P); /* invoke all pending watchers */
/*
* stop/start the timer handling.
......@@ -697,6 +694,7 @@ EV_API_DECL void ev_resume (EV_P) EV_NOEXCEPT;
ev_set_cb ((ev), cb_); \
} while (0)
#define ev_io_modify(ev,events_) do { (ev)->events = (ev)->events & EV__IOFDSET | (events_); } while (0)
#define ev_io_set(ev,fd_,events_) do { (ev)->fd = (fd_); (ev)->events = (events_) | EV__IOFDSET; } while (0)
#define ev_timer_set(ev,after_,repeat_) do { ((ev_watcher_time *)(ev))->at = (after_); (ev)->repeat = (repeat_); } while (0)
#define ev_periodic_set(ev,ofs_,ival_,rcb_) do { (ev)->offset = (ofs_); (ev)->interval = (ival_); (ev)->reschedule_cb = (rcb_); } while (0)
......@@ -742,6 +740,7 @@ EV_API_DECL void ev_resume (EV_P) EV_NOEXCEPT;
#define ev_periodic_at(ev) (+((ev_watcher_time *)(ev))->at)
#ifndef ev_set_cb
/* memmove is used here to avoid strict aliasing violations, and hopefully is optimized out by any reasonable compiler */
# define ev_set_cb(ev,cb_) (ev_cb_ (ev) = (cb_), memmove (&((ev_watcher *)(ev))->cb, &ev_cb_ (ev), sizeof (ev_cb_ (ev))))
#endif
......
......@@ -1220,8 +1220,9 @@ with a watcher-specific start function (C<< ev_TYPE_start (loop, watcher
corresponding stop function (C<< ev_TYPE_stop (loop, watcher *) >>.
As long as your watcher is active (has been started but not stopped) you
must not touch the values stored in it. Most specifically you must never
reinitialise it or call its C<ev_TYPE_set> macro.
must not touch the values stored in it except when explicitly documented
otherwise. Most specifically you must never reinitialise it or call its
C<ev_TYPE_set> macro.
Each and every callback receives the event loop pointer as first, the
registered watcher structure as second, and a bitset of received events as
......@@ -1650,15 +1651,18 @@ This section describes each watcher in detail, but will not repeat
information given in the last section. Any initialisation/set macros,
functions and members specific to the watcher type are explained.
Members are additionally marked with either I<[read-only]>, meaning that,
while the watcher is active, you can look at the member and expect some
sensible content, but you must not modify it (you can modify it while the
watcher is stopped to your hearts content), or I<[read-write]>, which
means you can expect it to have some sensible content while the watcher
is active, but you can also modify it. Modifying it may not do something
Most members are additionally marked with either I<[read-only]>, meaning
that, while the watcher is active, you can look at the member and expect
some sensible content, but you must not modify it (you can modify it while
the watcher is stopped to your hearts content), or I<[read-write]>, which
means you can expect it to have some sensible content while the watcher is
active, but you can also modify it (within the same thread as the event
loop, i.e. without creating data races). Modifying it may not do something
sensible or take immediate effect (or do anything at all), but libev will
not crash or malfunction in any way.
In any case, the documentation for each member will explain what the
effects are, and if there are any additional access restrictions.
=head2 C<ev_io> - is this file descriptor readable or writable?
......@@ -1828,16 +1832,36 @@ opportunity for a DoS attack.
=item ev_io_set (ev_io *, int fd, int events)
Configures an C<ev_io> watcher. The C<fd> is the file descriptor to
receive events for and C<events> is either C<EV_READ>, C<EV_WRITE> or
C<EV_READ | EV_WRITE>, to express the desire to receive the given events.
receive events for and C<events> is either C<EV_READ>, C<EV_WRITE>, both
C<EV_READ | EV_WRITE> or C<0>, to express the desire to receive the given
events.
=item int fd [read-only]
Note that setting the C<events> to C<0> and starting the watcher is
supported, but not specially optimized - if your program sometimes happens
to generate this combination this is fine, but if it is easy to avoid
starting an io watcher watching for no events you should do so.
The file descriptor being watched.
=item ev_io_modify (ev_io *, int events)
=item int events [read-only]
Similar to C<ev_io_set>, but only changes the requested events. Using this
might be faster with some backends, as libev can assume that the C<fd>
still refers to the same underlying file description, something it cannot
do when using C<ev_io_set>.
The events being watched.
=item int fd [no-modify]
The file descriptor being watched. While it can be read at any time, you
must not modify this member even when the watcher is stopped - always use
C<ev_io_set> for that.
=item int events [no-modify]
The set of events the fd is being watched for, among other flags. Remember
that this is a bit set - to test for C<EV_READ>, use C<< w->events &
EV_READ >>, and similarly for C<EV_WRITE>.
As with C<fd>, you must not modify this member even when the watcher is
stopped, always use C<ev_io_set> or C<ev_io_modify> for that.
=back
......@@ -4245,6 +4269,9 @@ method.
For C<ev::embed> watchers this method is called C<set_embed>, to avoid
clashing with the C<set (loop)> method.
For C<ev::io> watchers there is an additional C<set> method that acepts a
new event mask only, and internally calls C<ev_io_modfify>.
=item w->start ()
Starts the watcher. Note that there is no C<loop> argument, as the
......
/*
* libev linux io_uring fd activity backend
*
* Copyright (c) 2019 Marc Alexander Lehmann <libev@schmorp.de>
* Copyright (c) 2019-2020 Marc Alexander Lehmann <libev@schmorp.de>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modifica-
......@@ -46,11 +46,11 @@
* of linux aio or epoll and so on and so on. and you could do event stuff
* without any syscalls. what's not to like?
* d) ok, it's vastly more complex, but that's ok, really.
* e) why 3 mmaps instead of one? one would be more space-efficient,
* and I can't see what benefit three would have (other than being
* e) why two mmaps instead of one? one would be more space-efficient,
* and I can't see what benefit two would have (other than being
* somehow resizable/relocatable, but that's apparently not possible).
* f) hmm, it's practiclaly undebuggable (gdb can't access the memory, and
the bizarre way structure offsets are commuinicated makes it hard to
* f) hmm, it's practically undebuggable (gdb can't access the memory, and
* the bizarre way structure offsets are communicated makes it hard to
* just print the ring buffer heads, even *iff* the memory were visible
* in gdb. but then, that's also ok, really.
* g) well, you cannot specify a timeout when waiting for events. no,
......@@ -60,27 +60,32 @@
* like a µ-optimisation by the io_uring author for his personal
* applications, to the detriment of everybody else who just wants
* an event loop. but, umm, ok, if that's all, it could be worse.
* h) there is a hardcoded limit of 4096 outstanding events. okay,
* at least there is no arbitrary low system-wide limit...
* (from what I gather from the author Jens Axboe, it simply didn't
* occur to him, and he made good on it by adding an unlimited nuber
* of timeouts later :).
* h) initially there was a hardcoded limit of 4096 outstanding events.
* later versions not only bump this to 32k, but also can handle
* an unlimited amount of events, so this only affects the batch size.
* i) unlike linux aio, you *can* register more then the limit
* of fd events, and the kernel will "gracefully" signal an
* overflow, after which you could destroy and recreate the kernel
* state, a bit bigger, or fall back to e.g. poll. thats not
* totally insane, but kind of questions the point a high
* performance I/O framework when it doesn't really work
* under stress.
* j) but, oh my! is has exactly the same bugs as the linux aio backend,
* where some undocumented poll combinations just fail.
* so we need epoll AGAIN as a fallback. AGAIN! epoll!! and of course,
* this is completely undocumented, have I mantioned this already?
* of fd events. while early verisons of io_uring signalled an overflow
* and you ended up getting wet. 5.5+ does not do this anymore.
* j) but, oh my! it had exactly the same bugs as the linux aio backend,
* where some undocumented poll combinations just fail. fortunately,
* after finally reaching the author, he was more than willing to fix
* this probably in 5.6+.
* k) overall, the *API* itself is, I dare to say, not a total trainwreck.
* the big isuess with it are the bugs requiring epoll, which might
* or might not get fixed (do I hold my breath?).
* once the bugs ae fixed (probably in 5.6+), it will be without
* competition.
*/
/* TODO: use internal TIMEOUT */
/* TODO: take advantage of single mmap, NODROP etc. */
/* TODO: resize cq/sq size independently */
#include <sys/timerfd.h>
#include <sys/mman.h>
#include <poll.h>
#include <stdint.h>
#define IOURING_INIT_ENTRIES 32
......@@ -98,7 +103,10 @@ struct io_uring_sqe
__u8 flags;
__u16 ioprio;
__s32 fd;
union {
__u64 off;
__u64 addr2;
};
__u64 addr;
__u32 len;
union {
......@@ -107,6 +115,11 @@ struct io_uring_sqe
__u16 poll_events;
__u32 sync_range_flags;
__u32 msg_flags;
__u32 timeout_flags;
__u32 accept_flags;
__u32 cancel_flags;
__u32 open_flags;
__u32 statx_flags;
};
__u64 user_data;
union {
......@@ -153,13 +166,27 @@ struct io_uring_params
__u32 flags;
__u32 sq_thread_cpu;
__u32 sq_thread_idle;
__u32 resv[5];
__u32 features;
__u32 resv[4];
struct io_sqring_offsets sq_off;
struct io_cqring_offsets cq_off;
};
#define IORING_SETUP_CQSIZE 0x00000008
#define IORING_OP_POLL_ADD 6
#define IORING_OP_POLL_REMOVE 7
#define IORING_OP_TIMEOUT 11
#define IORING_OP_TIMEOUT_REMOVE 12
/* relative or absolute, reference clock is CLOCK_MONOTONIC */
struct iouring_kernel_timespec
{
int64_t tv_sec;
long long tv_nsec;
};
#define IORING_TIMEOUT_ABS 0x00000001
#define IORING_ENTER_GETEVENTS 0x01
......@@ -167,6 +194,10 @@ struct io_uring_params
#define IORING_OFF_CQ_RING 0x08000000ULL
#define IORING_OFF_SQES 0x10000000ULL
#define IORING_FEAT_SINGLE_MMAP 0x00000001
#define IORING_FEAT_NODROP 0x00000002
#define IORING_FEAT_SUBMIT_STABLE 0x00000004
inline_size
int
evsys_io_uring_setup (unsigned entries, struct io_uring_params *params)
......@@ -195,20 +226,62 @@ evsys_io_uring_enter (int fd, unsigned to_submit, unsigned min_complete, unsigne
#define EV_SQES ((struct io_uring_sqe *) iouring_sqes)
#define EV_CQES ((struct io_uring_cqe *)((char *)iouring_cq_ring + iouring_cq_cqes))
inline_speed
int
iouring_enter (EV_P_ ev_tstamp timeout)
{
int res;
EV_RELEASE_CB;
res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1,
timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0);
assert (("libev: io_uring_enter did not consume all sqes", (res < 0 || res == iouring_to_submit)));
iouring_to_submit = 0;
EV_ACQUIRE_CB;
return res;
}
/* TODO: can we move things around so we don't need this forward-reference? */
static void
iouring_poll (EV_P_ ev_tstamp timeout);
static
struct io_uring_sqe *
iouring_sqe_get (EV_P)
{
unsigned tail = EV_SQ_VAR (tail);
unsigned tail;
if (tail + 1 - EV_SQ_VAR (head) > EV_SQ_VAR (ring_entries))
for (;;)
{
/* queue full, flush */
evsys_io_uring_enter (iouring_fd, iouring_to_submit, 0, 0, 0, 0);
iouring_to_submit = 0;
tail = EV_SQ_VAR (tail);
if (ecb_expect_true (tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)))
break; /* whats the problem, we have free sqes */
/* queue full, need to flush and possibly handle some events */
#if EV_FEATURE_CODE
/* first we ask the kernel nicely, most often this frees up some sqes */
int res = iouring_enter (EV_A_ EV_TS_CONST (0.));
ECB_MEMORY_FENCE_ACQUIRE; /* better safe than sorry */
if (res >= 0)
continue; /* yes, it worked, try again */
#endif
/* some problem, possibly EBUSY - do the full poll and let it handle any issues */
iouring_poll (EV_A_ EV_TS_CONST (0.));
/* iouring_poll should have done ECB_MEMORY_FENCE_ACQUIRE for us */
}
assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));
/*assert (("libev: io_uring queue full after flush", tail + 1 - EV_SQ_VAR (head) <= EV_SQ_VAR (ring_entries)));*/
return EV_SQES + (tail & EV_SQ_VAR (ring_mask));
}
......@@ -238,12 +311,6 @@ iouring_tfd_cb (EV_P_ struct ev_io *w, int revents)
iouring_tfd_to = EV_TSTAMP_HUGE;
}
static void
iouring_epoll_cb (EV_P_ struct ev_io *w, int revents)
{
epoll_poll (EV_A_ 0);
}
/* called for full and partial cleanup */
ecb_cold
static int
......@@ -256,8 +323,11 @@ iouring_internal_destroy (EV_P)
if (iouring_cq_ring != MAP_FAILED) munmap (iouring_cq_ring, iouring_cq_ring_size);
if (iouring_sqes != MAP_FAILED) munmap (iouring_sqes , iouring_sqes_size );
if (ev_is_active (&iouring_epoll_w)) ev_ref (EV_A); ev_io_stop (EV_A_ &iouring_epoll_w);
if (ev_is_active (&iouring_tfd_w )) ev_ref (EV_A); ev_io_stop (EV_A_ &iouring_tfd_w );
if (ev_is_active (&iouring_tfd_w))
{
ev_ref (EV_A);
ev_io_stop (EV_A_ &iouring_tfd_w);
}
}
ecb_cold
......@@ -273,6 +343,9 @@ iouring_internal_init (EV_P)
iouring_cq_ring = MAP_FAILED;
iouring_sqes = MAP_FAILED;
if (!have_monotonic) /* cannot really happen, but what if11 */
return -1;
for (;;)
{
iouring_fd = evsys_io_uring_setup (iouring_entries, &params);
......@@ -283,6 +356,11 @@ iouring_internal_init (EV_P)
if (errno != EINVAL)
return -1; /* we failed */
#if TODO
if ((~params.features) & (IORING_FEAT_NODROP | IORING_FEATURE_SINGLE_MMAP | IORING_FEAT_SUBMIT_STABLE))
return -1; /* we require the above features */
#endif
/* EINVAL: lots of possible reasons, but maybe
* it is because we hit the unqueryable hardcoded size limit
*/
......@@ -344,14 +422,7 @@ iouring_fork (EV_P)
while (iouring_internal_init (EV_A) < 0)
ev_syserr ("(libev) io_uring_setup");
/* forking epoll should also effectively unregister all fds from the backend */
epoll_fork (EV_A);
/* epoll_fork already did this. hopefully */
/*fd_rearm_all (EV_A);*/
ev_io_stop (EV_A_ &iouring_epoll_w);
ev_io_set (EV_A_ &iouring_epoll_w, backend_fd, EV_READ);
ev_io_start (EV_A_ &iouring_epoll_w);
fd_rearm_all (EV_A);
ev_io_stop (EV_A_ &iouring_tfd_w);
ev_io_set (EV_A_ &iouring_tfd_w, iouring_tfd, EV_READ);
......@@ -363,22 +434,19 @@ iouring_fork (EV_P)
static void
iouring_modify (EV_P_ int fd, int oev, int nev)
{
if (ecb_expect_false (anfds [fd].eflags))
{
/* we handed this fd over to epoll, so undo this first */
/* we do it manually because the optimisations on epoll_modify won't do us any good */
epoll_ctl (iouring_fd, EPOLL_CTL_DEL, fd, 0);
anfds [fd].eflags = 0;
oev = 0;
}
if (oev)
{
/* we assume the sqe's are all "properly" initialised */
struct io_uring_sqe *sqe = iouring_sqe_get (EV_A);
sqe->opcode = IORING_OP_POLL_REMOVE;
sqe->fd = fd;
sqe->user_data = -1;
/* Jens Axboe notified me that user_data is not what is documented, but is
* some kind of unique ID that has to match, otherwise the request cannot
* be removed. Since we don't *really* have that, we pass in the old
* generation counter - if that fails, too bad, it will hopefully be removed
* at close time and then be ignored. */
sqe->addr = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32);
sqe->user_data = (uint64_t)-1;
iouring_sqe_submit (EV_A_ sqe);
/* increment generation counter to avoid handling old events */
......@@ -390,6 +458,7 @@ iouring_modify (EV_P_ int fd, int oev, int nev)
struct io_uring_sqe *sqe = iouring_sqe_get (EV_A);
sqe->opcode = IORING_OP_POLL_ADD;
sqe->fd = fd;
sqe->addr = 0;
sqe->user_data = (uint32_t)fd | ((__u64)(uint32_t)anfds [fd].egen << 32);
sqe->poll_events =
(nev & EV_READ ? POLLIN : 0)
......@@ -429,9 +498,9 @@ iouring_process_cqe (EV_P_ struct io_uring_cqe *cqe)
uint32_t gen = cqe->user_data >> 32;
int res = cqe->res;
/* ignore fd removal events, if there are any. TODO: verify */
if (cqe->user_data == (__u64)-1)
abort ();//D
/* user_data -1 is a remove that we are not atm. interested in */
if (cqe->user_data == (uint64_t)-1)
return;
assert (("libev: io_uring fd must be in-bounds", fd >= 0 && fd < anfdmax));
......@@ -442,23 +511,16 @@ iouring_process_cqe (EV_P_ struct io_uring_cqe *cqe)
*/
/* ignore event if generation doesn't match */
/* other than skipping removal events, */
/* this should actually be very rare */
if (ecb_expect_false (gen != (uint32_t)anfds [fd].egen))
return;
if (ecb_expect_false (res < 0))
{
if (res == -EINVAL)
{
/* we assume this error code means the fd/poll combination is buggy
* and fall back to epoll.
* this error code might also indicate a bug, but the kernel doesn't
* distinguish between those two conditions, so... sigh...
*/
/*TODO: EINVAL handling (was something failed with this fd)*/
epoll_modify (EV_A_ fd, 0, anfds [fd].events);
}
else if (res == -EBADF)
if (res == -EBADF)
{
assert (("libev: event loop rejected bad fd", res != -EBADF));
fd_kill (EV_A_ fd);
......@@ -494,7 +556,7 @@ iouring_overflow (EV_P)
/* we have two options, resize the queue (by tearing down
* everything and recreating it, or living with it
* and polling.
* we implement this by resizing tghe queue, and, if that fails,
* we implement this by resizing the queue, and, if that fails,
* we just recreate the state on every failure, which
* kind of is a very inefficient poll.
* one danger is, due to the bios toward lower fds,
......@@ -516,12 +578,12 @@ iouring_overflow (EV_P)
/* we hit the kernel limit, we should fall back to something else.
* we can either poll() a few times and hope for the best,
* poll always, or switch to epoll.
* since we use epoll anyways, go epoll.
* TODO: is this necessary with newer kernels?
*/
iouring_internal_destroy (EV_A);
/* this should make it so that on return, we don'T call any uring functions */
/* this should make it so that on return, we don't call any uring functions */
iouring_to_submit = 0;
for (;;)
......@@ -572,7 +634,11 @@ static void
iouring_poll (EV_P_ ev_tstamp timeout)
{
/* if we have events, no need for extra syscalls, but we might have to queue events */
if (iouring_handle_cq (EV_A))
/* we also clar the timeout if there are outstanding fdchanges */
/* the latter should only happen if both the sq and cq are full, most likely */
/* because we have a lot of event sources that immediately complete */
/* TODO: fdchacngecnt is always 0 because fd_reify does not have two buffers yet */
if (iouring_handle_cq (EV_A) || fdchangecnt)
timeout = EV_TS_CONST (0.);
else
/* no events, so maybe wait for some */
......@@ -581,19 +647,13 @@ iouring_poll (EV_P_ ev_tstamp timeout)
/* only enter the kernel if we have something to submit, or we need to wait */
if (timeout || iouring_to_submit)
{
int res;
EV_RELEASE_CB;
res = evsys_io_uring_enter (iouring_fd, iouring_to_submit, 1,
timeout > EV_TS_CONST (0.) ? IORING_ENTER_GETEVENTS : 0, 0, 0);
iouring_to_submit = 0;
EV_ACQUIRE_CB;
int res = iouring_enter (EV_A_ timeout);
if (ecb_expect_false (res < 0))
if (errno == EINTR)
/* ignore */;
else if (errno == EBUSY)
/* cq full, cannot submit - should be rare because we flush the cq first, so simply ignore */;
else
ev_syserr ("(libev) iouring setup");
else
......@@ -605,9 +665,6 @@ inline_size
int
iouring_init (EV_P_ int flags)
{
if (!epoll_init (EV_A_ 0))
return 0;
iouring_entries = IOURING_INIT_ENTRIES;
iouring_max_entries = 0;
......@@ -617,15 +674,8 @@ iouring_init (EV_P_ int flags)
return 0;
}
ev_io_init (&iouring_epoll_w, iouring_epoll_cb, backend_fd, EV_READ);
ev_set_priority (&iouring_epoll_w, EV_MAXPRI);
ev_io_init (&iouring_tfd_w, iouring_tfd_cb, iouring_tfd, EV_READ);
ev_set_priority (&iouring_tfd_w, EV_MAXPRI);
ev_io_start (EV_A_ &iouring_epoll_w);
ev_unref (EV_A); /* watcher should not keep loop alive */
ev_set_priority (&iouring_tfd_w, EV_MINPRI);
ev_io_start (EV_A_ &iouring_tfd_w);
ev_unref (EV_A); /* watcher should not keep loop alive */
......@@ -640,6 +690,5 @@ void
iouring_destroy (EV_P)
{
iouring_internal_destroy (EV_A);
epoll_destroy (EV_A);
}
......@@ -270,8 +270,7 @@ linuxaio_modify (EV_P_ int fd, int oev, int nev)
++anfd->egen;
}
iocb->io.aio_buf =
(nev & EV_READ ? POLLIN : 0)
iocb->io.aio_buf = (nev & EV_READ ? POLLIN : 0)
| (nev & EV_WRITE ? POLLOUT : 0);
if (nev)
......
......@@ -145,7 +145,6 @@ VARx(uint32_t, iouring_cq_cqes)
VARx(ev_tstamp, iouring_tfd_to)
VARx(int, iouring_tfd)
VARx(ev_io, iouring_tfd_w)
VARx(ev_io, iouring_epoll_w)
#endif
#if EV_USE_KQUEUE || EV_GENWRAP
......
......@@ -53,7 +53,6 @@
#define iouring_cq_ring_size ((loop)->iouring_cq_ring_size)
#define iouring_cq_tail ((loop)->iouring_cq_tail)
#define iouring_entries ((loop)->iouring_entries)
#define iouring_epoll_w ((loop)->iouring_epoll_w)
#define iouring_fd ((loop)->iouring_fd)
#define iouring_max_entries ((loop)->iouring_max_entries)
#define iouring_sq_array ((loop)->iouring_sq_array)
......@@ -189,7 +188,6 @@
#undef iouring_cq_ring_size
#undef iouring_cq_tail
#undef iouring_entries
#undef iouring_epoll_w
#undef iouring_fd
#undef iouring_max_entries
#undef iouring_sq_array
......
Update the embedded libev from 4.31 to 4.33.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment