Commit 7ae6b2e6 authored by William Ahern's avatar William Ahern

Merge branch 'portable_bitops' of git://github.com/nmathewson/timeout into...

Merge branch 'portable_bitops' of git://github.com/nmathewson/timeout into nmathewson-portable_bitops
parents 6f9379e9 d9d3e5dd
#include <stdint.h>
#ifdef _MSC_VER
#include <intrin.h> /* _BitScanForward, _BitScanReverse */
#endif
/* First define ctz and clz functions; these are compiler-dependent if
* you want them to be fast. */
#if defined(__GNUC__) && !defined(TIMEOUT_DISABLE_GNUC_BITOPS)
/* On GCC and clang and some others, we can use __builtin functions. They
* are not defined for n==0, but timeout.s never calls them with n==0. */
#define ctz64(n) __builtin_ctzll(n)
#define clz64(n) __builtin_clzll(n)
#if LONG_BITS == 32
#define ctz32(n) __builtin_ctzl(n)
#define clz32(n) __builtin_clzl(n)
#else
#define ctz32(n) __builtin_ctz(n)
#define clz32(n) __builtin_clz(n)
#endif
#elif defined(_MSC_VER) && !defined(TIMEOUT_DISABLE_MSVC_BITOPS)
/* On MSVC, we have these handy functions. We can ignore their return
* values, since we will never supply val == 0. */
static __inline int ctz32(unsigned long val)
{
DWORD zeros = 0;
_BitScanForward(&zeros, val);
return zeros;
}
static __inline int clz32(unsigned long val)
{
DWORD zeros = 0;
_BitScanReverse(&zeros, val);
return zeros;
}
#ifdef _WIN64
/* According to the documentation, these only exist on Win64. */
static __inline int ctz64(uint64_t val)
{
DWORD zeros = 0;
_BitScanForward64(&zeros, val);
return zeros;
}
static __inline int clz64(uint64_t val)
{
DWORD zeros = 0;
_BitScanReverse64(&zeros, val);
return zeros;
}
#else
static __inline int ctz64(uint64_t val)
{
uint32_t lo = (uint32_t) val;
uint32_t hi = (uint32_t) (val >> 32);
return lo ? ctz32(lo) : 32 + ctz32(hi);
}
static __inline int clz64(uint64_t val)
{
uint32_t lo = (uint32_t) val;
uint32_t hi = (uint32_t) (val >> 32);
return hi ? clz32(hi) : 32 + clz32(lo);
}
#endif
/* End of MSVC case. */
#else
/* TODO: There are more clever ways to do this in the generic case. */
#define process_(one, cz_bits, bits) \
if (x < ( one << (cz_bits - bits))) { rv += bits; x <<= bits; }
#define process64(bits) process_((UINT64_C(1)), 64, (bits))
static inline int clz64(uint64_t x)
{
int rv = 0;
process64(32);
process64(16);
process64(8);
process64(4);
process64(2);
process64(1);
return rv;
}
#define process32(bits) process_((UINT32_C(1)), 32, (bits))
static inline int clz32(uint32_t x)
{
int rv = 0;
process32(16);
process32(8);
process32(4);
process32(2);
process32(1);
return rv;
}
#undef process_
#undef process32
#undef process64
#define process_(one, bits) \
if ((x & ((one << (bits))-1)) == 0) { rv += bits; x >>= bits; }
#define process64(bits) process_((UINT64_C(1)), bits)
static inline int ctz64(uint64_t x)
{
int rv = 0;
process64(32);
process64(16);
process64(8);
process64(4);
process64(2);
process64(1);
return rv;
}
#define process32(bits) process_((UINT32_C(1)), bits)
static inline int ctz32(uint32_t x)
{
int rv = 0;
process32(16);
process32(8);
process32(4);
process32(2);
process32(1);
return rv;
}
#undef process32
#undef process64
#undef process_
/* End of generic case */
#endif /* End of defining ctz */
#ifdef TEST_BITOPS
#include <stdio.h>
#include <stdlib.h>
static uint64_t testcases[] = {
13371337 * 10,
100,
385789752,
82574,
(((uint64_t)1)<<63) + (((uint64_t)1)<<31) + 10101
};
static int
naive_clz(int bits, uint64_t v)
{
int r = 0;
uint64_t bit = ((uint64_t)1) << (bits-1);
while (bit && 0 == (v & bit)) {
r++;
bit >>= 1;
}
/* printf("clz(%d,%lx) -> %d\n", bits, v, r); */
return r;
}
static int
naive_ctz(int bits, uint64_t v)
{
int r = 0;
uint64_t bit = 1;
while (bit && 0 == (v & bit)) {
r++;
bit <<= 1;
if (r == bits)
break;
}
/* printf("ctz(%d,%lx) -> %d\n", bits, v, r); */
return r;
}
static int
check(uint64_t vv)
{
uint32_t v32 = (uint32_t) vv;
if (vv == 0)
return 1; /* c[tl]z64(0) is undefined. */
if (ctz64(vv) != naive_ctz(64, vv)) {
printf("mismatch with ctz64: %d\n", ctz64(vv));
exit(1);
return 0;
}
if (clz64(vv) != naive_clz(64, vv)) {
printf("mismatch with clz64: %d\n", clz64(vv));
exit(1);
return 0;
}
if (v32 == 0)
return 1; /* c[lt]z(0) is undefined. */
if (ctz32(v32) != naive_ctz(32, v32)) {
printf("mismatch with ctz32: %d\n", ctz32(v32));
exit(1);
return 0;
}
if (clz32(v32) != naive_clz(32, v32)) {
printf("mismatch with clz32: %d\n", clz32(v32));
exit(1);
return 0;
}
return 1;
}
int
main(int c, char **v)
{
unsigned int i;
const unsigned int n = sizeof(testcases)/sizeof(testcases[0]);
int result = 0;
for (i = 0; i <= 63; ++i) {
uint64_t x = 1 << i;
if (!check(x))
result = 1;
--x;
if (!check(x))
result = 1;
}
for (i = 0; i < n; ++i) {
if (! check(testcases[i]))
result = 1;
}
if (result) {
puts("FAIL");
} else {
puts("OK");
}
return result;
}
#endif
...@@ -122,17 +122,25 @@ ...@@ -122,17 +122,25 @@
#define WHEEL_MASK (WHEEL_LEN - 1) #define WHEEL_MASK (WHEEL_LEN - 1)
#define TIMEOUT_MAX ((TIMEOUT_C(1) << (WHEEL_BIT * WHEEL_NUM)) - 1) #define TIMEOUT_MAX ((TIMEOUT_C(1) << (WHEEL_BIT * WHEEL_NUM)) - 1)
#include "bitops.c"
#if WHEEL_BIT == 6 #if WHEEL_BIT == 6
#define ctz(n) ctz64(n)
#define clz(n) clz64(n)
#define fls(n) ((int)(64 - clz64(n)))
#else
#define ctz(n) ctz32(n)
#define clz(n) clz32(n)
#define fls(n) ((int)(32 - clz32(n)))
#endif
#if WHEEL_BIT == 6
#define WHEEL_C(n) UINT64_C(n) #define WHEEL_C(n) UINT64_C(n)
#define WHEEL_PRIu PRIu64 #define WHEEL_PRIu PRIu64
#define WHEEL_PRIx PRIx64 #define WHEEL_PRIx PRIx64
typedef uint64_t wheel_t; typedef uint64_t wheel_t;
#define ctz(n) __builtin_ctzll(n)
#define fls(n) ((int)(sizeof (long long) * CHAR_BIT) - __builtin_clzll(n))
#elif WHEEL_BIT == 5 #elif WHEEL_BIT == 5
#define WHEEL_C(n) UINT32_C(n) #define WHEEL_C(n) UINT32_C(n)
...@@ -141,9 +149,6 @@ typedef uint64_t wheel_t; ...@@ -141,9 +149,6 @@ typedef uint64_t wheel_t;
typedef uint32_t wheel_t; typedef uint32_t wheel_t;
#define ctz(n) __builtin_ctzl(n)
#define fls(n) ((int)(sizeof (long) * CHAR_BIT) - __builtin_clzl(n))
#elif WHEEL_BIT == 4 #elif WHEEL_BIT == 4
#define WHEEL_C(n) UINT16_C(n) #define WHEEL_C(n) UINT16_C(n)
...@@ -152,9 +157,6 @@ typedef uint32_t wheel_t; ...@@ -152,9 +157,6 @@ typedef uint32_t wheel_t;
typedef uint16_t wheel_t; typedef uint16_t wheel_t;
#define ctz(n) __builtin_ctz(n)
#define fls(n) ((int)(sizeof (int) * CHAR_BIT) - __builtin_clz(n))
#elif WHEEL_BIT == 3 #elif WHEEL_BIT == 3
#define WHEEL_C(n) UINT8_C(n) #define WHEEL_C(n) UINT8_C(n)
...@@ -163,9 +165,6 @@ typedef uint16_t wheel_t; ...@@ -163,9 +165,6 @@ typedef uint16_t wheel_t;
typedef uint8_t wheel_t; typedef uint8_t wheel_t;
#define ctz(n) __builtin_ctz(n)
#define fls(n) ((int)(sizeof (int) * CHAR_BIT) - __builtin_clz(n))
#else #else
#error invalid WHEEL_BIT value #error invalid WHEEL_BIT value
#endif #endif
...@@ -300,6 +299,7 @@ static inline reltime_t timeout_rem(struct timeouts *T, struct timeout *to) { ...@@ -300,6 +299,7 @@ static inline reltime_t timeout_rem(struct timeouts *T, struct timeout *to) {
static inline int timeout_wheel(timeout_t timeout) { static inline int timeout_wheel(timeout_t timeout) {
/* must be called with timeout != 0, so fls input is nonzero */
return (fls(MIN(timeout, TIMEOUT_MAX)) - 1) / WHEEL_BIT; return (fls(MIN(timeout, TIMEOUT_MAX)) - 1) / WHEEL_BIT;
} /* timeout_wheel() */ } /* timeout_wheel() */
...@@ -322,6 +322,11 @@ static void timeouts_sched(struct timeouts *T, struct timeout *to, timeout_t exp ...@@ -322,6 +322,11 @@ static void timeouts_sched(struct timeouts *T, struct timeout *to, timeout_t exp
if (expires > T->curtime) { if (expires > T->curtime) {
rem = timeout_rem(T, to); rem = timeout_rem(T, to);
/* rem is nonzero since:
* rem == timeout_rem(T,to),
* == to->expires - T->curtime
* and above we have expires > T->curtime.
*/
wheel = timeout_wheel(rem); wheel = timeout_wheel(rem);
slot = timeout_slot(wheel, to->expires); slot = timeout_slot(wheel, to->expires);
...@@ -417,6 +422,7 @@ TIMEOUT_PUBLIC void timeouts_update(struct timeouts *T, abstime_t curtime) { ...@@ -417,6 +422,7 @@ TIMEOUT_PUBLIC void timeouts_update(struct timeouts *T, abstime_t curtime) {
} }
while (pending & T->pending[wheel]) { while (pending & T->pending[wheel]) {
/* ctz input cannot be zero: loop condition. */
int slot = ctz(pending & T->pending[wheel]); int slot = ctz(pending & T->pending[wheel]);
TAILQ_CONCAT(&todo, &T->wheel[wheel][slot], tqe); TAILQ_CONCAT(&todo, &T->wheel[wheel][slot], tqe);
T->pending[wheel] &= ~(UINT64_C(1) << slot); T->pending[wheel] &= ~(UINT64_C(1) << slot);
...@@ -493,6 +499,8 @@ static timeout_t timeouts_int(struct timeouts *T) { ...@@ -493,6 +499,8 @@ static timeout_t timeouts_int(struct timeouts *T) {
if (T->pending[wheel]) { if (T->pending[wheel]) {
slot = WHEEL_MASK & (T->curtime >> (wheel * WHEEL_BIT)); slot = WHEEL_MASK & (T->curtime >> (wheel * WHEEL_BIT));
/* ctz input cannot be zero: T->pending[wheel] is
* nonzero, so rotr() is nonzero. */
_timeout = (ctz(rotr(T->pending[wheel], slot)) + !!wheel) << (wheel * WHEEL_BIT); _timeout = (ctz(rotr(T->pending[wheel], slot)) + !!wheel) << (wheel * WHEEL_BIT);
/* +1 to higher order wheels as those timeouts are one rotation in the future (otherwise they'd be on a lower wheel or expired) */ /* +1 to higher order wheels as those timeouts are one rotation in the future (otherwise they'd be on a lower wheel or expired) */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment