Commit a5f9a8fb authored by Rusty Russell's avatar Rusty Russell

Tim's ilog module.

parent f3505f82
/**
* ilog - Integer logarithm.
*
* ILOG_32() and ILOG_64() compute the minimum number of bits required to store
* an unsigned 32-bit or 64-bit value without any leading zero bits.
* This can also be thought of as the location of the highest set bit, with
* counting starting from one (so that 0 returns 0, 1 returns 1, and 2**31
* returns 32).
* When the value is known to be non-zero ILOGNZ_32() and ILOGNZ_64() can
* compile into as few as two instructions, one of which may get optimized out
* later.
* STATIC_ILOG_32 and STATIC_ILOG_64 allow computation on compile-time
* constants, so other compile-time constants can be derived from them.
*
* Example:
* #include <stdio.h>
* #include <limits.h>
* #include <ccan/ilog/ilog.h>
*
* int main(void){
* int i;
* printf("ILOG_32(0x%08X)=%i\n",0,ILOG_32(0));
* for(i=1;i<=STATIC_ILOG_32(USHRT_MAX);i++){
* uint32_t v;
* v=(uint32_t)1U<<i-1;
* //Here we know v is non-zero, so we can use ILOGNZ_32().
* printf("ILOG_32(0x%08X)=%i\n",v,ILOGNZ_32(v));
* }
* return 0;
* }
*
* License: LGPL (v2 or later)
*/
#include <string.h>
#include "config.h"
int main(int _argc,const char *_argv[]){
/*Expect exactly one argument.*/
if(_argc!=2)return 1;
if(strcmp(_argv[1],"depends")==0){
/*PRINTF-CCAN-PACKAGES-YOU-NEED-ONE-PER-LINE-IF-ANY*/
return 0;
}
return 1;
}
/*(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 LGPL (v2 or later).*/
#include "ilog.h"
#include <limits.h>
/*The fastest fallback strategy for platforms with fast multiplication appears
to be based on de Bruijn sequences~\cite{LP98}.
Tests confirmed this to be true even on an ARM11, where it is actually faster
than using the native clz instruction.
Define ILOG_NODEBRUIJN to use a simpler fallback on platforms where
multiplication or table lookups are too expensive.
@UNPUBLISHED{LP98,
author="Charles E. Leiserson and Harald Prokop",
title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word",
month=Jun,
year=1998,
note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}"
}*/
#if !defined(ILOG_NODEBRUIJN)&& \
!defined(CLZ32)||!defined(CLZ64)&&LONG_MAX<9223372036854775807LL
static const unsigned char DEBRUIJN_IDX32[32]={
0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9
};
#endif
int ilog32(uint32_t _v){
#if defined(CLZ32)
return (CLZ32_OFFS-CLZ32(_v))&-!!_v;
#else
/*On a Pentium M, this branchless version tested as the fastest version without
multiplications on 1,000,000,000 random 32-bit integers, edging out a
similar version with branches, and a 256-entry LUT version.*/
# if defined(ILOG_NODEBRUIJN)
int ret;
int m;
ret=_v>0;
m=(_v>0xFFFFU)<<4;
_v>>=m;
ret|=m;
m=(_v>0xFFU)<<3;
_v>>=m;
ret|=m;
m=(_v>0xFU)<<2;
_v>>=m;
ret|=m;
m=(_v>3)<<1;
_v>>=m;
ret|=m;
ret+=_v>1;
return ret;
/*This de Bruijn sequence version is faster if you have a fast multiplier.*/
# else
int ret;
ret=_v>0;
_v|=_v>>1;
_v|=_v>>2;
_v|=_v>>4;
_v|=_v>>8;
_v|=_v>>16;
_v=(_v>>1)+1;
ret+=DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F];
return ret;
# endif
#endif
}
int ilog64(uint64_t _v){
#if defined(CLZ64)
return (CLZ64_OFFS-CLZ64(_v))&-!!_v;
#else
# if defined(ILOG_NODEBRUIJN)
uint32_t v;
int ret;
int m;
ret=_v>0;
m=(_v>0xFFFFFFFFU)<<5;
v=(uint32_t)(_v>>m);
ret|=m;
m=(v>0xFFFFU)<<4;
v>>=m;
ret|=m;
m=(v>0xFFU)<<3;
v>>=m;
ret|=m;
m=(v>0xFU)<<2;
v>>=m;
ret|=m;
m=(v>3)<<1;
v>>=m;
ret|=m;
ret+=v>1;
return ret;
# else
/*If we don't have a 64-bit word, split it into two 32-bit halves.*/
# if LONG_MAX<9223372036854775807LL
uint32_t v;
int ret;
int m;
ret=_v>0;
m=(_v>0xFFFFFFFFU)<<5;
v=(uint32_t)(_v>>m);
ret|=m;
v|=v>>1;
v|=v>>2;
v|=v>>4;
v|=v>>8;
v|=v>>16;
v=(v>>1)+1;
ret+=DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F];
return ret;
/*Otherwise do it in one 64-bit operation.*/
# else
static const unsigned char DEBRUIJN_IDX64[64]={
0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
};
int ret;
ret=_v>0;
_v|=_v>>1;
_v|=_v>>2;
_v|=_v>>4;
_v|=_v>>8;
_v|=_v>>16;
_v|=_v>>32;
_v=(_v>>1)+1;
ret+=DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F];
return ret;
# endif
# endif
#endif
}
#if !defined(_ilog_H)
# define _ilog_H (1)
# include <stdint.h>
# ifdef __GNUC_PREREQ
/*Tag our functions as idempotent to aid optimization, if possible.*/
# if __GNUC_PREREQ(2,5)
# define IDEMPOTENT __attribute__((const))
# endif
# if __GNUC_PREREQ(3,4)
# include <limits.h>
/*Note the casts to (int) below: this prevents CLZ{32|64}_OFFS from "upgrading"
the type of an entire expression to an (unsigned) size_t.*/
# if INT_MAX>=2147483647
# define CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
# define CLZ32(_x) (__builtin_clz(_x))
# elif LONG_MAX>=2147483647L
# define CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
# define CLZ32(_x) (__builtin_clzl(_x))
# endif
# if INT_MAX>=9223372036854775807LL
# define CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
# define CLZ64(_x) (__builtin_clz(_x))
# elif LONG_MAX>=9223372036854775807LL
# define CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
# define CLZ64(_x) (__builtin_clzl(_x))
# elif LLONG_MAX>=9223372036854775807LL
# define CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT)
# define CLZ64(_x) (__builtin_clzll(_x))
# endif
# endif
# endif
/*If you have some other compiler which defines its own clz-style builtin,
implement a check for it here.*/
# if !defined(IDEMPOTENT)
# define IDEMPOTENT
# endif
/**
* ilog32 - Integer binary logarithm of a 32-bit value.
* @_v: A 32-bit value.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* The ILOG_32() or ILOGNZ_32() macros may be able to use a builtin function
* instead, which should be faster.
*/
int ilog32(uint32_t _v)IDEMPOTENT;
/**
* ilog64 - Integer binary logarithm of a 64-bit value.
* @_v: A 64-bit value.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* The ILOG_64() or ILOGNZ_64() macros may be able to use a builtin function
* instead, which should be faster.
*/
int ilog64(uint64_t _v)IDEMPOTENT;
# undef IDEMPOTENT
# if defined(CLZ32)
/**
* ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value.
* @_v: A non-zero 32-bit value.
* Returns floor(log2(_v))+1.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* If _v is zero, the return value is undefined; use ILOG_32() instead.
*/
# define ILOGNZ_32(_v) (CLZ32_OFFS-CLZ32(_v))
/**
* ILOG_32 - Integer binary logarithm of a 32-bit value.
* @_v: A 32-bit value.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
*/
# define ILOG_32(_v) (ILOGNZ_32(_v)&-!!(_v))
# else
# define ILOGNZ_32(_v) (ilog32(_v))
# define ILOG_32(_v) (ilog32(_v))
# endif
# if defined(CLZ64)
/**
* ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value.
* @_v: A non-zero 64-bit value.
* Returns floor(log2(_v))+1.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* If _v is zero, the return value is undefined; use ILOG_64() instead.
*/
# define ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v))
/**
* ILOG_64 - Integer binary logarithm of a 64-bit value.
* @_v: A 64-bit value.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
*/
# define ILOG_64(_v) (ILOGNZ_64(_v)&-!!(_v))
# else
# define ILOGNZ_64(_v) (ilog64(_v))
# define ILOG_64(_v) (ilog64(_v))
# endif
# define STATIC_ILOG0(_v) (!!(_v))
# define STATIC_ILOG1(_v) (((_v)&0x2)?2:STATIC_ILOG0(_v))
# define STATIC_ILOG2(_v) (((_v)&0xC)?2+STATIC_ILOG1((_v)>>2):STATIC_ILOG1(_v))
# define STATIC_ILOG3(_v) \
(((_v)&0xF0)?4+STATIC_ILOG2((_v)>>4):STATIC_ILOG2(_v))
# define STATIC_ILOG4(_v) \
(((_v)&0xFF00)?8+STATIC_ILOG3((_v)>>8):STATIC_ILOG3(_v))
# define STATIC_ILOG5(_v) \
(((_v)&0xFFFF0000)?16+STATIC_ILOG4((_v)>>16):STATIC_ILOG4(_v))
# define STATIC_ILOG6(_v) \
(((_v)&0xFFFFFFFF00000000ULL)?32+STATIC_ILOG5((_v)>>32):STATIC_ILOG5(_v))
/**
* STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant.
* @_v: A non-negative 32-bit constant.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* This macro is suitable for evaluation at compile time, but it should not be
* used on values that can change at runtime, as it operates via exhaustive
* search.
*/
# define STATIC_ILOG_32(_v) (STATIC_ILOG5((uint32_t)(_v)))
/**
* STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant.
* @_v: A non-negative 64-bit constant.
* Returns floor(log2(_v))+1, or 0 if _v==0.
* This is the number of bits that would be required to represent _v in two's
* complement notation with all of the leading zeros stripped.
* This macro is suitable for evaluation at compile time, but it should not be
* used on values that can change at runtime, as it operates via exhaustive
* search.
*/
# define STATIC_ILOG_64(_v) (STATIC_ILOG6((uint64_t)(_v)))
#endif
#include <stdio.h>
#include "ilog/ilog.h"
#include "tap/tap.h"
#if defined(__GNUC_PREREQ)
# if __GNUC_PREREQ(4,2)
# pragma GCC diagnostic ignored "-Wparentheses"
# endif
#endif
/*Dead simple (but slow) versions to compare against.*/
static int test_ilog32(uint32_t _v){
int ret;
for(ret=0;_v;ret++)_v>>=1;
return ret;
}
static int test_ilog64(uint64_t _v){
int ret;
for(ret=0;_v;ret++)_v>>=1;
return ret;
}
#define NTRIALS (64)
int main(int _argc,const char *_argv[]){
int nmatches;
int i;
int j;
/*This is how many tests you plan to run.*/
plan_tests(2);
nmatches=0;
for(i=0;i<=32;i++){
uint32_t v;
/*Test each bit in turn (and 0).*/
v=i?(uint32_t)1U<<i-1:0;
for(j=0;j<NTRIALS;j++){
int l;
l=test_ilog32(v);
if(ILOG_32(v)!=l){
fprintf(stderr,"ILOG_32(0x%08lX): %i != %i\n",(long)v,ILOG_32(v),l);
}
else nmatches++;
if(ilog32(v)!=l){
fprintf(stderr,"ilog32(0x%08lX): %i != %i\n",(long)v,ilog32(v),l);
}
else nmatches++;
if(STATIC_ILOG_32(v)!=l){
fprintf(stderr,"STATIC_ILOG_32(0x%08lX): %i != %i\n",
(long)v,STATIC_ILOG_32(v),l);
}
else nmatches++;
/*Also try a few more pseudo-random values with at most the same number
of bits.*/
v=1103515245U*v+12345U&0xFFFFFFFFU>>(33-i>>1)>>(32-i>>1);
}
}
ok1(nmatches==3*(32+1)*NTRIALS);
nmatches=0;
for(i=0;i<=64;i++){
uint64_t v;
/*Test each bit in turn (and 0).*/
v=i?(uint64_t)1U<<i-1:0;
for(j=0;j<NTRIALS;j++){
int l;
l=test_ilog64(v);
if(ILOG_64(v)!=l){
fprintf(stderr,"ILOG_64(0x%016llX): %i != %i\n",
(long long)v,ILOG_64(v),l);
}
else nmatches++;
if(ilog64(v)!=l){
fprintf(stderr,"ilog64(0x%016llX): %i != %i\n",
(long long)v,ilog64(v),l);
}
else nmatches++;
if(STATIC_ILOG_64(v)!=l){
fprintf(stderr,"STATIC_ILOG_64(0x%016llX): %i != %i\n",
(long long)v,STATIC_ILOG_64(v),l);
}
else nmatches++;
/*Also try a few more pseudo-random values with at most the same number
of bits.*/
v=(uint64_t)(2862933555777941757ULL*v+3037000493ULL
&0xFFFFFFFFFFFFFFFFULL>>(65-i>>1)>>(64-i>>1));
}
}
ok1(nmatches==3*(64+1)*NTRIALS);
return exit_status();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment