Commit 5c559e7d authored by Rusty Russell's avatar Rusty Russell

htable, strset: benchmarking tools.

This lets us compare hash table vs. strset vs. the example
implementation of critbit trees.

cbspeed 100 runs, min-max(avg):
#01: Initial insert:   236-245(237)
#02: Initial lookup (match):   180-186(180)
#03: Initial lookup (miss):   171-185(172)
#04: Initial lookup (random):   441-457(444)
#05: Initial delete all:   127-132(128)
#06: Initial re-inserting:   219-225(220)
#07: Deleting first half:   101-104(102)
#08: Adding (a different) half:   158-162(159)
#09: Lookup after half-change (match):   202-207(203)
#10: Lookup after half-change (miss):   217-222(218)
#11: Churn 1:   297-302(299)
#12: Churn 2:   297-305(300)
#13: Churn 3:   301-308(303)
#14: Post-Churn lookup (match):   189-195(190)
#15: Post-Churn lookup (miss):   189-193(190)
#16: Post-Churn lookup (random):   499-513(503)

speed 100 runs, min-max(avg):
#01: Initial insert:   211-218(212)
#02: Initial lookup (match):   161-166(162)
#03: Initial lookup (miss):   157-162(158)
#04: Initial lookup (random):   452-460(454)
#05: Initial delete all:   126-135(127)
#06: Initial re-inserting:   193-201(194)
#07: Deleting first half:   99-107(99)
#08: Adding (a different) half:   143-190(144)
#09: Lookup after half-change (match):   183-195(184)
#10: Lookup after half-change (miss):   197-203(198)
#11: Churn 1:   271-278(274)
#12: Churn 2:   280-287(282)
#13: Churn 3:   277-285(279)
#14: Post-Churn lookup (match):   171-175(171)
#15: Post-Churn lookup (miss):   174-178(175)
#16: Post-Churn lookup (random):   525-552(528)

stringspeed 100 runs, min-max(avg):
#01: Initial insert:   300-343(308)
#02: Initial lookup (match):   98-136(99)
#03: Initial lookup (miss):   73-102(75)
#04: Initial lookup (random):   230-282(233)
#05: Initial delete all:   66-102(69)
#06: Initial re-inserting:   62-99(64)
#07: Deleting first half:   43-52(43)
#08: Adding (a different) half:   101-156(106)
#09: Lookup after half-change (match):   114-156(120)
#10: Lookup after half-change (miss):   94-103(95)
#11: Churn 1:   98-105(99)
#12: Churn 2:   96-104(98)
#13: Churn 3:   174-184(176)
#14: Post-Churn lookup (match):   93-112(94)
#15: Post-Churn lookup (miss):   77-107(79)
#16: Post-Churn lookup (random):   229-265(232)
parent ab83de95
CFLAGS=-Wall -Werror -O3 -I../../..
#CFLAGS=-Wall -Werror -g -I../../..
speed: speed.o ../../hash.o
all: speed stringspeed
speed: speed.o hash.o
speed.o: speed.c ../htable.h ../htable.c
hash.o: ../../hash/hash.c
$(CC) $(CFLAGS) -c -o $@ $<
stringspeed: stringspeed.o hash.o ../../talloc.o ../../str_talloc.o ../../grab_file.o ../../str.o ../../time.o ../../noerr.o
stringspeed.o: speed.c ../htable.h ../htable.c
clean:
rm -f stringspeed speed
/* Simple speed tests for a hash of strings. */
#include <ccan/htable/htable_type.h>
#include <ccan/htable/htable.c>
#include <ccan/str_talloc/str_talloc.h>
#include <ccan/grab_file/grab_file.h>
#include <ccan/talloc/talloc.h>
#include <ccan/hash/hash.h>
#include <ccan/time/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
static size_t hashcount;
static const char *strkey(const char *str)
{
return str;
}
static size_t hash_str(const char *key)
{
hashcount++;
return hash(key, strlen(key), 0);
}
static bool cmp(const char *obj, const char *key)
{
return strcmp(obj, key) == 0;
}
HTABLE_DEFINE_TYPE(char, strkey, hash_str, cmp, str);
/* Nanoseconds per operation */
static size_t normalize(const struct timeval *start,
const struct timeval *stop,
unsigned int num)
{
struct timeval diff;
timersub(stop, start, &diff);
/* Floating point is more accurate here. */
return (double)(diff.tv_sec * 1000000 + diff.tv_usec)
/ num * 1000;
}
int main(int argc, char *argv[])
{
size_t i, j, num;
struct timeval start, stop;
struct htable_str *ht;
char **words, **misswords;
words = strsplit(NULL, grab_file(NULL,
argv[1] ? argv[1] : "/usr/share/dict/words",
NULL), "\n");
ht = htable_str_new();
num = talloc_array_length(words) - 1;
printf("%zu words\n", num);
/* Append and prepend last char for miss testing. */
misswords = talloc_array(words, char *, num);
for (i = 0; i < num; i++) {
char lastc;
if (strlen(words[i]))
lastc = words[i][strlen(words[i])-1];
else
lastc = 'z';
misswords[i] = talloc_asprintf(misswords, "%c%s%c%c",
lastc, words[i], lastc, lastc);
}
printf("#01: Initial insert: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
htable_str_add(ht, words[i]);
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("Bytes allocated: %zu\n",
sizeof(((struct htable *)ht)->table[0])
<< ((struct htable *)ht)->bits);
printf("#02: Initial lookup (match): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
if (htable_str_get(ht, words[i]) != words[i])
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#03: Initial lookup (miss): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++) {
if (htable_str_get(ht, misswords[i]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
/* Lookups in order are very cache-friendly for judy; try random */
printf("#04: Initial lookup (random): ");
fflush(stdout);
start = time_now();
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
if (htable_str_get(ht, words[j]) != words[j])
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
hashcount = 0;
printf("#05: Initial delete all: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
if (!htable_str_del(ht, words[i]))
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#06: Initial re-inserting: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
htable_str_add(ht, words[i]);
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
hashcount = 0;
printf("#07: Deleting first half: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i+=2)
if (!htable_str_del(ht, words[i]))
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#08: Adding (a different) half: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i+=2)
htable_str_add(ht, misswords[i]);
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#09: Lookup after half-change (match): ");
fflush(stdout);
start = time_now();
for (i = 1; i < num; i+=2)
if (htable_str_get(ht, words[i]) != words[i])
abort();
for (i = 0; i < num; i+=2) {
if (htable_str_get(ht, misswords[i]) != misswords[i])
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#10: Lookup after half-change (miss): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i+=2)
if (htable_str_get(ht, words[i]))
abort();
for (i = 1; i < num; i+=2) {
if (htable_str_get(ht, misswords[i]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
/* Hashtables with delete markers can fill with markers over time.
* so do some changes to see how it operates in long-term. */
printf("#11: Churn 1: ");
start = time_now();
for (j = 0; j < num; j+=2) {
if (!htable_str_del(ht, misswords[j]))
abort();
if (!htable_str_add(ht, words[j]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#12: Churn 2: ");
start = time_now();
for (j = 1; j < num; j+=2) {
if (!htable_str_del(ht, words[j]))
abort();
if (!htable_str_add(ht, misswords[j]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#13: Churn 3: ");
start = time_now();
for (j = 1; j < num; j+=2) {
if (!htable_str_del(ht, misswords[j]))
abort();
if (!htable_str_add(ht, words[j]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
/* Now it's back to normal... */
printf("#14: Post-Churn lookup (match): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
if (htable_str_get(ht, words[i]) != words[i])
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#15: Post-Churn lookup (miss): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++) {
if (htable_str_get(ht, misswords[i]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
/* Lookups in order are very cache-friendly for judy; try random */
printf("#16: Post-Churn lookup (random): ");
fflush(stdout);
start = time_now();
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
if (htable_str_get(ht, words[j]) != words[j])
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
return 0;
}
CFLAGS=-Wall -Werror -O3 -I../../..
#CFLAGS=-Wall -Werror -g -I../../..
all: cbspeed speed
cbspeed: cbspeed.o ../../talloc.o ../../str_talloc.o ../../grab_file.o ../../str.o ../../time.o ../../noerr.o
speed: speed.o ../../talloc.o ../../str_talloc.o ../../grab_file.o ../../str.o ../../time.o ../../noerr.o
clean:
rm -f cbspeed speed speed.o cbspeed.o
This diff is collapsed.
/* Simple speed tests using strset code.
*
* Results on my 32 bit Intel(R) Core(TM) i5 CPU M 560 @ 2.67GHz, gcc 4.5.2:
* Run 100 times: Min-Max(Avg)
#01: Initial insert: 212-219(214)
#02: Initial lookup (match): 161-169(162)
#03: Initial lookup (miss): 157-163(158)
#04: Initial lookup (random): 450-479(453)
#05: Initial delete all: 126-137(128)
#06: Initial re-inserting: 193-198(194)
#07: Deleting first half: 99-102(99)
#08: Adding (a different) half: 143-154(144)
#09: Lookup after half-change (match): 183-189(184)
#10: Lookup after half-change (miss): 198-212(199)
#11: Churn 1: 274-282(276)
#12: Churn 2: 279-296(282)
#13: Churn 3: 278-294(280)
#14: Post-Churn lookup (match): 170-180(171)
#15: Post-Churn lookup (miss): 175-186(176)
#16: Post-Churn lookup (random): 522-534(525)
*/
#include <ccan/str_talloc/str_talloc.h>
#include <ccan/grab_file/grab_file.h>
#include <ccan/talloc/talloc.h>
#include <ccan/time/time.h>
#include <ccan/strset/strset.c>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
/* Nanoseconds per operation */
static size_t normalize(const struct timeval *start,
const struct timeval *stop,
unsigned int num)
{
struct timeval diff;
timersub(stop, start, &diff);
/* Floating point is more accurate here. */
return (double)(diff.tv_sec * 1000000 + diff.tv_usec)
/ num * 1000;
}
int main(int argc, char *argv[])
{
size_t i, j, num;
struct timeval start, stop;
struct strset set;
char **words, **misswords;
words = strsplit(NULL, grab_file(NULL,
argv[1] ? argv[1] : "/usr/share/dict/words",
NULL), "\n");
strset_init(&set);
num = talloc_array_length(words) - 1;
printf("%zu words\n", num);
/* Append and prepend last char for miss testing. */
misswords = talloc_array(words, char *, num);
for (i = 0; i < num; i++) {
char lastc;
if (strlen(words[i]))
lastc = words[i][strlen(words[i])-1];
else
lastc = 'z';
misswords[i] = talloc_asprintf(misswords, "%c%s%c%c",
lastc, words[i], lastc, lastc);
}
printf("#01: Initial insert: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
strset_set(&set, words[i]);
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
#if 0
printf("Nodes allocated: %zu (%zu bytes)\n",
allocated, allocated * sizeof(critbit0_node));
#endif
printf("#02: Initial lookup (match): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
if (!strset_test(&set, words[i]))
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#03: Initial lookup (miss): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++) {
if (strset_test(&set, misswords[i]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
/* Lookups in order are very cache-friendly for judy; try random */
printf("#04: Initial lookup (random): ");
fflush(stdout);
start = time_now();
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
if (!strset_test(&set, words[j]))
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#05: Initial delete all: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
if (!strset_clear(&set, words[i]))
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#06: Initial re-inserting: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
strset_set(&set, words[i]);
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#07: Deleting first half: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i+=2)
if (!strset_clear(&set, words[i]))
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#08: Adding (a different) half: ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i+=2)
strset_set(&set, misswords[i]);
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#09: Lookup after half-change (match): ");
fflush(stdout);
start = time_now();
for (i = 1; i < num; i+=2)
if (!strset_test(&set, words[i]))
abort();
for (i = 0; i < num; i+=2) {
if (!strset_test(&set, misswords[i]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#10: Lookup after half-change (miss): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i+=2)
if (strset_test(&set, words[i]))
abort();
for (i = 1; i < num; i+=2) {
if (strset_test(&set, misswords[i]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
/* Hashtables with delete markers can fill with markers over time.
* so do some changes to see how it operates in long-term. */
printf("#11: Churn 1: ");
start = time_now();
for (j = 0; j < num; j+=2) {
if (!strset_clear(&set, misswords[j]))
abort();
if (!strset_set(&set, words[j]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#12: Churn 2: ");
start = time_now();
for (j = 1; j < num; j+=2) {
if (!strset_clear(&set, words[j]))
abort();
if (!strset_set(&set, misswords[j]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#13: Churn 3: ");
start = time_now();
for (j = 1; j < num; j+=2) {
if (!strset_clear(&set, misswords[j]))
abort();
if (!strset_set(&set, words[j]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
/* Now it's back to normal... */
printf("#14: Post-Churn lookup (match): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++)
if (!strset_test(&set, words[i]))
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
printf("#15: Post-Churn lookup (miss): ");
fflush(stdout);
start = time_now();
for (i = 0; i < num; i++) {
if (strset_test(&set, misswords[i]))
abort();
}
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
/* Lookups in order are very cache-friendly for judy; try random */
printf("#16: Post-Churn lookup (random): ");
fflush(stdout);
start = time_now();
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num)
if (!strset_test(&set, words[j]))
abort();
stop = time_now();
printf(" %zu ns\n", normalize(&start, &stop, num));
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment